Mercurial > repos > devteam > fastq_paired_end_interlacer

--- a/fastq_paired_end_interlacer.py	Thu Jan 23 12:31:16 2014 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,59 +0,0 @@
-#Florent Angly
-import sys
-from galaxy_utils.sequence.fastq import fastqReader, fastqWriter, fastqNamedReader, fastqJoiner
-
-def main():
-    mate1_filename   = sys.argv[1]
-    mate1_type       = sys.argv[2] or 'sanger'
-    mate2_filename   = sys.argv[3]
-    mate2_type       = sys.argv[4] or 'sanger'
-    outfile_pairs    = sys.argv[5]
-    outfile_singles = sys.argv[6]
-
-    if mate1_type != mate2_type:
-        print "WARNING: You are trying to interlace files of two different types: %s and %s." % ( mate1_type, mate2_type )
-        return
-
-    type = mate1_type
-    joiner = fastqJoiner( type )
-    out_pairs = fastqWriter( open( outfile_pairs, 'wb' ), format = type )
-    out_singles = fastqWriter( open( outfile_singles, 'wb' ), format = type )
-
-    # Pairs + singles present in mate1
-    nof_singles = 0
-    nof_pairs   = 0
-    mate2_input = fastqNamedReader( open( mate2_filename, 'rb' ), format = type )
-    i = None
-    for i, mate1 in enumerate( fastqReader( open( mate1_filename, 'rb' ), format = type ) ):
-        mate2 = mate2_input.get( joiner.get_paired_identifier( mate1 ) )
-        if mate2:
-            out_pairs.write( mate1 )
-            out_pairs.write( mate2 )
-            nof_pairs += 1
-        else:
-            out_singles.write( mate1 )
-            nof_singles += 1
-
-    # Singles present in mate2
-    mate1_input = fastqNamedReader( open( mate1_filename, 'rb' ), format = type )
-    j = None
-    for j, mate2 in enumerate( fastqReader( open( mate2_filename, 'rb' ), format = type ) ):
-        mate1 = mate1_input.get( joiner.get_paired_identifier( mate2 ) )
-        if not mate1:
-            out_singles.write( mate2 )
-            nof_singles += 1
-
-    if (i is None) and (j is None):
-        print "Your input files contained no valid FASTQ sequences."
-    else:
-        print 'There were %s single reads.' % ( nof_singles )
-        print 'Interlaced %s pairs of sequences.' % ( nof_pairs )
-
-    mate1_input.close()
-    mate2_input.close()
-    out_pairs.close()
-    out_singles.close()
-
-
-if __name__ == "__main__":
-    main()
--- a/fastq_paired_end_interlacer.xml	Thu Jan 23 12:31:16 2014 -0500
+++ b/fastq_paired_end_interlacer.xml	Sat Sep 30 14:57:07 2017 -0400
@@ -1,35 +1,78 @@
-<tool id="fastq_paired_end_interlacer" name="FASTQ interlacer" version="1.1">
-  <description>on paired end reads</description>
-  <requirements>
-    <requirement type="package" version="1.0.0">galaxy_sequence_utils</requirement>
-  </requirements>
-  <command interpreter="python">fastq_paired_end_interlacer.py '$input1_file' '${input1_file.extension[len( 'fastq' ):]}' '$input2_file' '${input2_file.extension[len( 'fastq' ):]}' '$outfile_pairs' '$outfile_singles'</command>
-  <inputs>
-    <param name="input1_file" type="data" format="fastqsanger,fastqcssanger" label="Left-hand mates" />
-    <param name="input2_file" type="data" format="fastqsanger,fastqcssanger" label="Right-hand mates" />
-  </inputs>
-  <outputs>
-    <!-- $input1_file.name = filename  , e.g. paired_end_2_errors.fastqsanger -->
-    <!-- $input1_file.id   = ID        , e.g. 10 -->
-    <!-- $input1_file.hid  = history ID, e.g. 5  -->
-    <data name="outfile_pairs"   format="input" label="FASTQ interlacer pairs from data ${input1_file.hid} and data ${input2_file.hid}"/>
-    <data name="outfile_singles" format="input" label="FASTQ interlacer singles from data ${input1_file.hid} and data ${input2_file.hid}"/>
-  </outputs>
-  <tests>
-    <test>
-      <param name="input1_file" value="paired_end_1.fastqsanger" ftype="fastqsanger" />
-      <param name="input2_file" value="paired_end_2.fastqsanger" ftype="fastqsanger" />
-      <output name="outfile_pairs" file="paired_end_merged.fastqsanger" />
-      <output name="outfile_singles" file="paired_end_merged_singles.fastqsanger" />
-    </test>
-    <test>
-      <param name="input1_file" value="paired_end_1_errors.fastqsanger" ftype="fastqsanger" />
-      <param name="input2_file" value="paired_end_2_errors.fastqsanger" ftype="fastqsanger" />
-      <output name="outfile_pairs" file="paired_end_merged_cleaned.fastqsanger" />
-      <output name="outfile_singles" file="paired_end_merged_cleaned_singles.fastqsanger" />
-    </test>
-  </tests>
-  <help>
+<tool id="fastq_paired_end_interlacer" name="FASTQ interlacer" version="1.2.0">
+    <description>on paired end reads</description>
+    <requirements>
+        <requirement type="package" version="1.1.1">galaxy_sequence_utils</requirement>
+    </requirements>
+    <command><![CDATA[
+gx-fastq-paired-end-interlacer
+#if $reads.reads_selector == 'paired'
+    '${reads.input1_file}' ${reads.input1_file.extension[len('fastq'):]} '${reads.input2_file}' ${reads.input2_file.extension[len('fastq'):]}
+    '$outfile_pairs' '$outfile_singles'
+#else
+    '${reads.reads_coll.forward}' ${reads.reads_coll.forward.extension[len('fastq'):]} '${reads.reads_coll.reverse}' ${reads.reads_coll.reverse.extension[len('fastq'):]}
+    '$outfile_pairs_from_coll' '$outfile_singles_from_coll'
+#end if
+    ]]></command>
+    <inputs>
+        <conditional name="reads">
+            <param name="reads_selector" type="select" label="Type of paired-end datasets">
+                <option value="paired">2 separate datasets</option>
+                <option value="paired_collection">1 paired dataset collection</option>
+            </param>
+            <when value="paired">
+                <param name="input1_file" type="data" format="fastqsanger,fastqcssanger,fastqsanger.gz,fastqcssanger.gz,fastqsanger.bz2,fastqcssanger.bz2" label="Left-hand mates" />
+                <param name="input2_file" type="data" format="fastqsanger,fastqcssanger,fastqsanger.gz,fastqcssanger.gz,fastqsanger.bz2,fastqcssanger.bz2" label="Right-hand mates" />
+            </when>
+            <when value="paired_collection">
+                <param name="reads_coll" type="data_collection" collection_type="paired" format="fastqsanger,fastqcssanger,fastqsanger.gz,fastqcssanger.gz,fastqsanger.bz2,fastqcssanger.bz2" label="Paired-end reads collection" />
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <!-- $input1_file.name = filename  , e.g. paired_end_2_errors.fastqsanger -->
+        <!-- $input1_file.id   = ID        , e.g. 10 -->
+        <!-- $input1_file.hid  = history ID, e.g. 5  -->
+        <data name="outfile_pairs" format_source="input1_file" label="FASTQ interlacer pairs from ${on_string}">
+            <filter>reads['reads_selector'] == 'paired'</filter>
+        </data>
+        <data name="outfile_singles" format_source="input1_file" label="FASTQ interlacer singles from ${on_string}">
+            <filter>reads['reads_selector'] == 'paired'</filter>
+        </data>
+        <data name="outfile_pairs_from_coll" format_source="reads_coll['forward']" label="FASTQ interlacer pairs from ${on_string}">
+            <filter>reads['reads_selector'] == 'paired_collection'</filter>
+        </data>
+        <data name="outfile_singles_from_coll" format_source="reads_coll['forward']" label="FASTQ interlacer singles from ${on_string}">
+            <filter>reads['reads_selector'] == 'paired_collection'</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="reads_selector" value="paired" />
+            <param name="input1_file" value="paired_end_1.fastqsanger" ftype="fastqsanger" />
+            <param name="input2_file" value="paired_end_2.fastqsanger" ftype="fastqsanger" />
+            <output name="outfile_pairs" file="paired_end_merged.fastqsanger" ftype="fastqsanger" />
+            <output name="outfile_singles" file="paired_end_merged_singles.fastqsanger" ftype="fastqsanger" />
+        </test>
+        <test>
+            <param name="reads_selector" value="paired" />
+            <param name="input1_file" value="paired_end_1_errors.fastqsanger" ftype="fastqsanger" />
+            <param name="input2_file" value="paired_end_2_errors.fastqsanger" ftype="fastqsanger" />
+            <output name="outfile_pairs" file="paired_end_merged_cleaned.fastqsanger" ftype="fastqsanger" />
+            <output name="outfile_singles" file="paired_end_merged_cleaned_singles.fastqsanger" ftype="fastqsanger" />
+        </test>
+        <test>
+            <param name="reads_selector" value="paired_collection" />
+            <param name="reads_coll">
+                <collection type="paired">
+                    <element name="forward" value="paired_end_1.fastqsanger" ftype="fastqsanger" />
+                    <element name="reverse" value="paired_end_2.fastqsanger" ftype="fastqsanger" />
+                </collection>
+            </param>
+            <output name="outfile_pairs_from_coll" file="paired_end_merged.fastqsanger" ftype="fastqsanger" />
+            <output name="outfile_singles_from_coll" file="paired_end_merged_singles.fastqsanger" ftype="fastqsanger" />
+        </test>
+    </tests>
+    <help><![CDATA[
 **What it does**

 This tool joins paired end FASTQ reads from two separate files, one with the left mates and one with the right mates, into a single files where left mates alternate with their right mates. The join is performed using sequence identifiers, allowing the two files to contain differing ordering. If a sequence identifier does not appear in both files, it is included in a separate file.
@@ -70,6 +113,8 @@
     WNUUZ\P^`BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB

 A multiple-fastq file containing reads that have no mate is also produced.
-
-  </help>
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/btq281</citation>
+    </citations>
 </tool>
--- a/tool_dependencies.xml	Thu Jan 23 12:31:16 2014 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-  <package name="galaxy_sequence_utils" version="1.0.0">
-      <repository changeset_revision="195699b1562a" name="package_galaxy_utils_1_0" owner="devteam" prior_installation_required="False" toolshed="http://testtoolshed.g2.bx.psu.edu" />
-    </package>
-</tool_dependency>