# HG changeset patch # User devteam # Date 1506794026 14400 # Node ID a63d1aa8e8297a0c026bc512991474faf59b0b69 # Parent 9616277d3d393b6364a113a8322945e7dc72150a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/galaxy_sequence_utils/fastq_to_tabular commit f2582539542b33240234e8ea6093e25d0aee9b6a diff -r 9616277d3d39 -r a63d1aa8e829 fastq_to_tabular.py --- a/fastq_to_tabular.py Fri Dec 18 19:30:48 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,38 +0,0 @@ -#Dan Blankenberg -import sys -from galaxy_utils.sequence.fastq import fastqReader - -def stop_err( msg ): - sys.stderr.write( msg ) - sys.exit() - -def main(): - if len(sys.argv) != 5: - stop_err("Wrong number of arguments. Expect: fasta tabular desrc_split [type]") - input_filename = sys.argv[1] - output_filename = sys.argv[2] - descr_split = int( sys.argv[3] ) - 1 - if descr_split < 0: - stop_err("Bad description split value (should be 1 or more)") - input_type = sys.argv[4] or 'sanger' #input type should ordinarily be unnecessary - - num_reads = None - fastq_read = None - out = open( output_filename, 'wb' ) - if descr_split == 0: - #Don't divide the description into multiple columns - for num_reads, fastq_read in enumerate( fastqReader( open( input_filename ), format = input_type ) ): - out.write( "%s\t%s\t%s\n" % ( fastq_read.identifier[1:].replace( '\t', ' ' ), fastq_read.sequence.replace( '\t', ' ' ), fastq_read.quality.replace( '\t', ' ' ) ) ) - else: - for num_reads, fastq_read in enumerate( fastqReader( open( input_filename ), format = input_type ) ): - words = fastq_read.identifier[1:].replace( '\t', ' ' ).split(None, descr_split) - #pad with empty columns if required - words += [""]*(descr_split-len(words)) - out.write( "%s\t%s\t%s\n" % ("\t".join(words), fastq_read.sequence.replace( '\t', ' ' ), fastq_read.quality.replace( '\t', ' ' ) ) ) - out.close() - if num_reads is None: - print "No valid FASTQ reads could be processed." - else: - print "%i FASTQ reads were converted to Tabular." % ( num_reads + 1 ) - -if __name__ == "__main__": main() diff -r 9616277d3d39 -r a63d1aa8e829 fastq_to_tabular.xml --- a/fastq_to_tabular.xml Fri Dec 18 19:30:48 2015 -0500 +++ b/fastq_to_tabular.xml Sat Sep 30 13:53:46 2017 -0400 @@ -1,40 +1,45 @@ - - converter - - galaxy_sequence_utils - - fastq_to_tabular.py '$input_file' '$output_file' $descr_columns '${input_file.extension[len( 'fastq' ):]}' - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + converter + + galaxy_sequence_utils + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + <;!!! @FSRRS4401BRRTC [length=145] [gc=38.62] [flows=800] [phred_min=0] [phred_max=38] [trimmed_length=74] tcagCCAGCAATTCCGACTTAATTGTTCTTCTTCCATCATTCATCTCGACTAACAGTTCTACGATTAATGAGTTTGGCtt taatttgttgttcattattgtcacaattacactactgagactgccaaggcacncagggataggnn +FSRRS4401BRRTC [length=145] [gc=38.62] [flows=800] [phred_min=0] [phred_max=38] [trimmed_length=74] - FFFFFFFFFDDDDFFFFGFDDDDBAAAAA=<4444@@B=555:BBBBB@@?8:8<?<89898<84442;==3,,,514,, - ,11,,,.,,21777555513,..--1115758.//34488><<;;;;9944/!/4,,,57855!! + FFFFFFFFFDDDDFFFFGFDDDDBAAAAA=<4444@@B=555:BBBBB@@?8:8<<;;;;9944/!/4,,,57855!! By default this is converted into a 3 column tabular file, with the full FASTQ title used as column 1: @@ -92,13 +97,8 @@ ============== ============ ========== =========== ============= ============== =================== ============== ============== Note the sequences and quality strings have been truncated for display purposes in the above tables. - ------- - - - - - 10.1093/bioinformatics/btq281 - - + ]]> + + 10.1093/bioinformatics/btq281 + diff -r 9616277d3d39 -r a63d1aa8e829 test-data/sanger_full_range_original_sanger.fastqsanger.gz Binary file test-data/sanger_full_range_original_sanger.fastqsanger.gz has changed diff -r 9616277d3d39 -r a63d1aa8e829 tool_dependencies.xml --- a/tool_dependencies.xml Fri Dec 18 19:30:48 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ - - - - - -