Mercurial > repos > devteam > fastq_groomer
diff fastq_groomer.py @ 0:06c42572d7c0 draft
Imported from capsule None
author | devteam |
---|---|
date | Thu, 23 Jan 2014 12:31:44 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastq_groomer.py Thu Jan 23 12:31:44 2014 -0500 @@ -0,0 +1,42 @@ +#Dan Blankenberg +import sys +from galaxy_utils.sequence.fastq import fastqReader, fastqVerboseErrorReader, fastqAggregator, fastqWriter + +def main(): + input_filename = sys.argv[1] + input_type = sys.argv[2] + output_filename = sys.argv[3] + output_type = sys.argv[4] + force_quality_encoding = sys.argv[5] + summarize_input = sys.argv[6] == 'summarize_input' + if force_quality_encoding == 'None': + force_quality_encoding = None + + aggregator = fastqAggregator() + out = fastqWriter( open( output_filename, 'wb' ), format = output_type, force_quality_encoding = force_quality_encoding ) + read_count = None + if summarize_input: + reader = fastqVerboseErrorReader + else: + reader = fastqReader + for read_count, fastq_read in enumerate( reader( open( input_filename ), format = input_type, apply_galaxy_conventions = True ) ): + if summarize_input: + aggregator.consume_read( fastq_read ) + out.write( fastq_read ) + out.close() + + if read_count is not None: + print "Groomed %i %s reads into %s reads." % ( read_count + 1, input_type, output_type ) + if input_type != output_type and 'solexa' in [ input_type, output_type ]: + print "Converted between Solexa and PHRED scores." + if summarize_input: + print "Based upon quality and sequence, the input data is valid for: %s" % ( ", ".join( aggregator.get_valid_formats() ) or "None" ) + ascii_range = aggregator.get_ascii_range() + decimal_range = aggregator.get_decimal_range() + print "Input ASCII range: %s(%i) - %s(%i)" % ( repr( ascii_range[0] ), ord( ascii_range[0] ), repr( ascii_range[1] ), ord( ascii_range[1] ) ) #print using repr, since \x00 (null) causes info truncation in galaxy when printed + print "Input decimal range: %i - %i" % ( decimal_range[0], decimal_range[1] ) + else: + print "No valid FASTQ reads were provided." + + +if __name__ == "__main__": main()