Mercurial > repos > devteam > fastq_paired_end_interlacer
changeset 1:e0a8fba7ed2f draft
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/galaxy_sequence_utils/fastq_paired_end_interlacer commit f2582539542b33240234e8ea6093e25d0aee9b6a
author | devteam |
---|---|
date | Sat, 30 Sep 2017 14:57:07 -0400 |
parents | cfc3ad769dba |
children | 1adeef975783 |
files | fastq_paired_end_interlacer.py fastq_paired_end_interlacer.xml tool_dependencies.xml |
diffstat | 3 files changed, 79 insertions(+), 99 deletions(-) [+] |
line wrap: on
line diff
--- a/fastq_paired_end_interlacer.py Thu Jan 23 12:31:16 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,59 +0,0 @@ -#Florent Angly -import sys -from galaxy_utils.sequence.fastq import fastqReader, fastqWriter, fastqNamedReader, fastqJoiner - -def main(): - mate1_filename = sys.argv[1] - mate1_type = sys.argv[2] or 'sanger' - mate2_filename = sys.argv[3] - mate2_type = sys.argv[4] or 'sanger' - outfile_pairs = sys.argv[5] - outfile_singles = sys.argv[6] - - if mate1_type != mate2_type: - print "WARNING: You are trying to interlace files of two different types: %s and %s." % ( mate1_type, mate2_type ) - return - - type = mate1_type - joiner = fastqJoiner( type ) - out_pairs = fastqWriter( open( outfile_pairs, 'wb' ), format = type ) - out_singles = fastqWriter( open( outfile_singles, 'wb' ), format = type ) - - # Pairs + singles present in mate1 - nof_singles = 0 - nof_pairs = 0 - mate2_input = fastqNamedReader( open( mate2_filename, 'rb' ), format = type ) - i = None - for i, mate1 in enumerate( fastqReader( open( mate1_filename, 'rb' ), format = type ) ): - mate2 = mate2_input.get( joiner.get_paired_identifier( mate1 ) ) - if mate2: - out_pairs.write( mate1 ) - out_pairs.write( mate2 ) - nof_pairs += 1 - else: - out_singles.write( mate1 ) - nof_singles += 1 - - # Singles present in mate2 - mate1_input = fastqNamedReader( open( mate1_filename, 'rb' ), format = type ) - j = None - for j, mate2 in enumerate( fastqReader( open( mate2_filename, 'rb' ), format = type ) ): - mate1 = mate1_input.get( joiner.get_paired_identifier( mate2 ) ) - if not mate1: - out_singles.write( mate2 ) - nof_singles += 1 - - if (i is None) and (j is None): - print "Your input files contained no valid FASTQ sequences." - else: - print 'There were %s single reads.' % ( nof_singles ) - print 'Interlaced %s pairs of sequences.' % ( nof_pairs ) - - mate1_input.close() - mate2_input.close() - out_pairs.close() - out_singles.close() - - -if __name__ == "__main__": - main()
--- a/fastq_paired_end_interlacer.xml Thu Jan 23 12:31:16 2014 -0500 +++ b/fastq_paired_end_interlacer.xml Sat Sep 30 14:57:07 2017 -0400 @@ -1,35 +1,78 @@ -<tool id="fastq_paired_end_interlacer" name="FASTQ interlacer" version="1.1"> - <description>on paired end reads</description> - <requirements> - <requirement type="package" version="1.0.0">galaxy_sequence_utils</requirement> - </requirements> - <command interpreter="python">fastq_paired_end_interlacer.py '$input1_file' '${input1_file.extension[len( 'fastq' ):]}' '$input2_file' '${input2_file.extension[len( 'fastq' ):]}' '$outfile_pairs' '$outfile_singles'</command> - <inputs> - <param name="input1_file" type="data" format="fastqsanger,fastqcssanger" label="Left-hand mates" /> - <param name="input2_file" type="data" format="fastqsanger,fastqcssanger" label="Right-hand mates" /> - </inputs> - <outputs> - <!-- $input1_file.name = filename , e.g. paired_end_2_errors.fastqsanger --> - <!-- $input1_file.id = ID , e.g. 10 --> - <!-- $input1_file.hid = history ID, e.g. 5 --> - <data name="outfile_pairs" format="input" label="FASTQ interlacer pairs from data ${input1_file.hid} and data ${input2_file.hid}"/> - <data name="outfile_singles" format="input" label="FASTQ interlacer singles from data ${input1_file.hid} and data ${input2_file.hid}"/> - </outputs> - <tests> - <test> - <param name="input1_file" value="paired_end_1.fastqsanger" ftype="fastqsanger" /> - <param name="input2_file" value="paired_end_2.fastqsanger" ftype="fastqsanger" /> - <output name="outfile_pairs" file="paired_end_merged.fastqsanger" /> - <output name="outfile_singles" file="paired_end_merged_singles.fastqsanger" /> - </test> - <test> - <param name="input1_file" value="paired_end_1_errors.fastqsanger" ftype="fastqsanger" /> - <param name="input2_file" value="paired_end_2_errors.fastqsanger" ftype="fastqsanger" /> - <output name="outfile_pairs" file="paired_end_merged_cleaned.fastqsanger" /> - <output name="outfile_singles" file="paired_end_merged_cleaned_singles.fastqsanger" /> - </test> - </tests> - <help> +<tool id="fastq_paired_end_interlacer" name="FASTQ interlacer" version="1.2.0"> + <description>on paired end reads</description> + <requirements> + <requirement type="package" version="1.1.1">galaxy_sequence_utils</requirement> + </requirements> + <command><![CDATA[ +gx-fastq-paired-end-interlacer +#if $reads.reads_selector == 'paired' + '${reads.input1_file}' ${reads.input1_file.extension[len('fastq'):]} '${reads.input2_file}' ${reads.input2_file.extension[len('fastq'):]} + '$outfile_pairs' '$outfile_singles' +#else + '${reads.reads_coll.forward}' ${reads.reads_coll.forward.extension[len('fastq'):]} '${reads.reads_coll.reverse}' ${reads.reads_coll.reverse.extension[len('fastq'):]} + '$outfile_pairs_from_coll' '$outfile_singles_from_coll' +#end if + ]]></command> + <inputs> + <conditional name="reads"> + <param name="reads_selector" type="select" label="Type of paired-end datasets"> + <option value="paired">2 separate datasets</option> + <option value="paired_collection">1 paired dataset collection</option> + </param> + <when value="paired"> + <param name="input1_file" type="data" format="fastqsanger,fastqcssanger,fastqsanger.gz,fastqcssanger.gz,fastqsanger.bz2,fastqcssanger.bz2" label="Left-hand mates" /> + <param name="input2_file" type="data" format="fastqsanger,fastqcssanger,fastqsanger.gz,fastqcssanger.gz,fastqsanger.bz2,fastqcssanger.bz2" label="Right-hand mates" /> + </when> + <when value="paired_collection"> + <param name="reads_coll" type="data_collection" collection_type="paired" format="fastqsanger,fastqcssanger,fastqsanger.gz,fastqcssanger.gz,fastqsanger.bz2,fastqcssanger.bz2" label="Paired-end reads collection" /> + </when> + </conditional> + </inputs> + <outputs> + <!-- $input1_file.name = filename , e.g. paired_end_2_errors.fastqsanger --> + <!-- $input1_file.id = ID , e.g. 10 --> + <!-- $input1_file.hid = history ID, e.g. 5 --> + <data name="outfile_pairs" format_source="input1_file" label="FASTQ interlacer pairs from ${on_string}"> + <filter>reads['reads_selector'] == 'paired'</filter> + </data> + <data name="outfile_singles" format_source="input1_file" label="FASTQ interlacer singles from ${on_string}"> + <filter>reads['reads_selector'] == 'paired'</filter> + </data> + <data name="outfile_pairs_from_coll" format_source="reads_coll['forward']" label="FASTQ interlacer pairs from ${on_string}"> + <filter>reads['reads_selector'] == 'paired_collection'</filter> + </data> + <data name="outfile_singles_from_coll" format_source="reads_coll['forward']" label="FASTQ interlacer singles from ${on_string}"> + <filter>reads['reads_selector'] == 'paired_collection'</filter> + </data> + </outputs> + <tests> + <test> + <param name="reads_selector" value="paired" /> + <param name="input1_file" value="paired_end_1.fastqsanger" ftype="fastqsanger" /> + <param name="input2_file" value="paired_end_2.fastqsanger" ftype="fastqsanger" /> + <output name="outfile_pairs" file="paired_end_merged.fastqsanger" ftype="fastqsanger" /> + <output name="outfile_singles" file="paired_end_merged_singles.fastqsanger" ftype="fastqsanger" /> + </test> + <test> + <param name="reads_selector" value="paired" /> + <param name="input1_file" value="paired_end_1_errors.fastqsanger" ftype="fastqsanger" /> + <param name="input2_file" value="paired_end_2_errors.fastqsanger" ftype="fastqsanger" /> + <output name="outfile_pairs" file="paired_end_merged_cleaned.fastqsanger" ftype="fastqsanger" /> + <output name="outfile_singles" file="paired_end_merged_cleaned_singles.fastqsanger" ftype="fastqsanger" /> + </test> + <test> + <param name="reads_selector" value="paired_collection" /> + <param name="reads_coll"> + <collection type="paired"> + <element name="forward" value="paired_end_1.fastqsanger" ftype="fastqsanger" /> + <element name="reverse" value="paired_end_2.fastqsanger" ftype="fastqsanger" /> + </collection> + </param> + <output name="outfile_pairs_from_coll" file="paired_end_merged.fastqsanger" ftype="fastqsanger" /> + <output name="outfile_singles_from_coll" file="paired_end_merged_singles.fastqsanger" ftype="fastqsanger" /> + </test> + </tests> + <help><![CDATA[ **What it does** This tool joins paired end FASTQ reads from two separate files, one with the left mates and one with the right mates, into a single files where left mates alternate with their right mates. The join is performed using sequence identifiers, allowing the two files to contain differing ordering. If a sequence identifier does not appear in both files, it is included in a separate file. @@ -70,6 +113,8 @@ WNUUZ\P^`BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB A multiple-fastq file containing reads that have no mate is also produced. - - </help> + ]]></help> + <citations> + <citation type="doi">10.1093/bioinformatics/btq281</citation> + </citations> </tool>
--- a/tool_dependencies.xml Thu Jan 23 12:31:16 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="galaxy_sequence_utils" version="1.0.0"> - <repository changeset_revision="195699b1562a" name="package_galaxy_utils_1_0" owner="devteam" prior_installation_required="False" toolshed="http://testtoolshed.g2.bx.psu.edu" /> - </package> -</tool_dependency>