Mercurial > repos > devteam > fastq_paired_end_joiner
changeset 0:d86b8db06e05 draft
Imported from capsule None
author | devteam |
---|---|
date | Thu, 23 Jan 2014 12:30:57 -0500 |
parents | |
children | ce853b881881 |
files | fastq_paired_end_joiner.py fastq_paired_end_joiner.xml test-data/3.fastqsanger test-data/split_pair_reads_1.fastqsanger test-data/split_pair_reads_2.fastqsanger tool_dependencies.xml |
diffstat | 6 files changed, 169 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastq_paired_end_joiner.py Thu Jan 23 12:30:57 2014 -0500 @@ -0,0 +1,38 @@ +#Dan Blankenberg +import sys, os, shutil +from galaxy_utils.sequence.fastq import fastqReader, fastqNamedReader, fastqWriter, fastqJoiner + +def main(): + #Read command line arguments + input1_filename = sys.argv[1] + input1_type = sys.argv[2] or 'sanger' + input2_filename = sys.argv[3] + input2_type = sys.argv[4] or 'sanger' + output_filename = sys.argv[5] + + if input1_type != input2_type: + print "WARNING: You are trying to join files of two different types: %s and %s." % ( input1_type, input2_type ) + + input2 = fastqNamedReader( open( input2_filename, 'rb' ), input2_type ) + joiner = fastqJoiner( input1_type ) + out = fastqWriter( open( output_filename, 'wb' ), format = input1_type ) + + i = None + skip_count = 0 + for i, fastq_read in enumerate( fastqReader( open( input1_filename, 'rb' ), format = input1_type ) ): + identifier = joiner.get_paired_identifier( fastq_read ) + fastq_paired = input2.get( identifier ) + if fastq_paired is None: + skip_count += 1 + else: + out.write( joiner.join( fastq_read, fastq_paired ) ) + out.close() + + if i is None: + print "Your file contains no valid FASTQ reads." + else: + print input2.has_data() + print 'Joined %s of %s read pairs (%.2f%%).' % ( i - skip_count + 1, i + 1, float( i - skip_count + 1 ) / float( i + 1 ) * 100.0 ) + +if __name__ == "__main__": + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastq_paired_end_joiner.xml Thu Jan 23 12:30:57 2014 -0500 @@ -0,0 +1,65 @@ +<tool id="fastq_paired_end_joiner" name="FASTQ joiner" version="1.0.0"> + <description>on paired end reads</description> + <requirements> + <requirement type="package" version="1.0.0">galaxy_sequence_utils</requirement> + </requirements> + <command interpreter="python">fastq_paired_end_joiner.py '$input1_file' '${input1_file.extension[len( 'fastq' ):]}' '$input2_file' '${input2_file.extension[len( 'fastq' ):]}' '$output_file'</command> + <inputs> + <param name="input1_file" type="data" format="fastqsanger,fastqcssanger" label="Left-hand Reads" /> + <param name="input2_file" type="data" format="fastqsanger,fastqcssanger" label="Right-hand Reads" /> + </inputs> + <outputs> + <data name="output_file" format="input" /> + </outputs> + <tests> + <test> + <param name="input1_file" value="split_pair_reads_1.fastqsanger" ftype="fastqsanger" /> + <param name="input2_file" value="split_pair_reads_2.fastqsanger" ftype="fastqsanger" /> + <output name="output_file" file="3.fastqsanger" /> + </test> + </tests> + <help> +**What it does** + +This tool joins paired end FASTQ reads from two separate files into a single read in one file. The join is performed using sequence identifiers, allowing the two files to contain differing ordering. If a sequence identifier does not appear in both files, it is excluded from the output. + +Sequence identifiers with /1 and /2 appended override the left-hand and right-hand designation; i.e. if the reads end with /1 and /2, the read containing /1 will be used as the left-hand read and the read containing /2 will be used as the right-hand read. Sequences without this designation will follow the left-hand and right-hand settings set by the user. + +----- + +**Input formats** + +Left-hand Read:: + + @HWI-EAS91_1_30788AAXX:7:21:1542:1758/1 + GTCAATTGTACTGGTCAATACTAAAAGAATAGGATC + +HWI-EAS91_1_30788AAXX:7:21:1542:1758/1 + hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh + +Right-hand Read:: + + @HWI-EAS91_1_30788AAXX:7:21:1542:1758/2 + GCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCA + +HWI-EAS91_1_30788AAXX:7:21:1542:1758/2 + hhhhhhhhhhhhhhhhhhhhhhhh`hfhhVZSWehR + +----- + +**Output** + +A multiple-fastq file, for example:: + + @HWI-EAS91_1_30788AAXX:7:21:1542:1758 + GTCAATTGTACTGGTCAATACTAAAAGAATAGGATCGCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCA + +HWI-EAS91_1_30788AAXX:7:21:1542:1758 + hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh`hfhhVZSWehR + +------ + +**Citation** + +If you use this tool, please cite `Blankenberg D, Gordon A, Von Kuster G, Coraor N, Taylor J, Nekrutenko A; Galaxy Team. Manipulation of FASTQ data with Galaxy. Bioinformatics. 2010 Jul 15;26(14):1783-5. <http://www.ncbi.nlm.nih.gov/pubmed/20562416>`_ + + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/3.fastqsanger Thu Jan 23 12:30:57 2014 -0500 @@ -0,0 +1,20 @@ +@HWI-EAS91_1_30788AAXX:7:21:1542:1758 +GTCAATTGTACTGGTCAATACTAAAAGAATAGGATCGCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCA ++HWI-EAS91_1_30788AAXX:7:21:1542:1758 +hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh`hfhhVZSWehR +@HWI-EAS91_1_30788AAXX:7:22:1621:462 +ATAATGGCTATTATTGTGGGGGGGATGATGCTGGAAACTAGCCCCAATATCAATCCTATATCAAATCTCACC ++HWI-EAS91_1_30788AAXX:7:22:1621:462 +hhhhhhhhhhhhQAhh@hhhhNhhhfhMbCIScC?hhJhhhhChhhJhhhRhhKhePhc\KhhV\KhXhJhh +@HWI-EAS91_1_30788AAXX:7:45:408:807 +TACCCGATTTTTTGCTTTCCACTTTATCCTACCCTTATGAGTGCTAGGATCAGGATGGAGAGGATTAGGGCT ++HWI-EAS91_1_30788AAXX:7:45:408:807 +hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh`hhhZh`hhhhhRXhhYh +@HWI-EAS91_1_30788AAXX:7:49:654:1439 +CTAACTCTATTTATTGTATTTCAACTAAAAATCTCATAGGTTTATTGATAGTTGTGTTGTTGGTGTAAATGG ++HWI-EAS91_1_30788AAXX:7:49:654:1439 +hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhdhh_hG\XhU@ +@HWI-EAS91_1_30788AAXX:7:64:947:234 +TATCAAAAAAGAATATAATCTGAATCAACACTACAACCTATTAGTGTGTAGAATAGGAAGTAGAGGCCTGCG ++HWI-EAS91_1_30788AAXX:7:64:947:234 +hhhhhhhhhhhhhhhhhhhhhhhRhhehhahhhhhJhhhhhhhh^hPhWfhhhhThWUhhfhh_hhNIVPUd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/split_pair_reads_1.fastqsanger Thu Jan 23 12:30:57 2014 -0500 @@ -0,0 +1,20 @@ +@HWI-EAS91_1_30788AAXX:7:21:1542:1758/1 +GTCAATTGTACTGGTCAATACTAAAAGAATAGGATC ++HWI-EAS91_1_30788AAXX:7:21:1542:1758/1 +hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh +@HWI-EAS91_1_30788AAXX:7:22:1621:462/1 +ATAATGGCTATTATTGTGGGGGGGATGATGCTGGAA ++HWI-EAS91_1_30788AAXX:7:22:1621:462/1 +hhhhhhhhhhhhQAhh@hhhhNhhhfhMbCIScC?h +@HWI-EAS91_1_30788AAXX:7:45:408:807/1 +TACCCGATTTTTTGCTTTCCACTTTATCCTACCCTT ++HWI-EAS91_1_30788AAXX:7:45:408:807/1 +hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh +@HWI-EAS91_1_30788AAXX:7:49:654:1439/1 +CTAACTCTATTTATTGTATTTCAACTAAAAATCTCA ++HWI-EAS91_1_30788AAXX:7:49:654:1439/1 +hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh +@HWI-EAS91_1_30788AAXX:7:64:947:234/1 +TATCAAAAAAGAATATAATCTGAATCAACACTACAA ++HWI-EAS91_1_30788AAXX:7:64:947:234/1 +hhhhhhhhhhhhhhhhhhhhhhhRhhehhahhhhhJ
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/split_pair_reads_2.fastqsanger Thu Jan 23 12:30:57 2014 -0500 @@ -0,0 +1,20 @@ +@HWI-EAS91_1_30788AAXX:7:21:1542:1758/2 +GCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCA ++HWI-EAS91_1_30788AAXX:7:21:1542:1758/2 +hhhhhhhhhhhhhhhhhhhhhhhh`hfhhVZSWehR +@HWI-EAS91_1_30788AAXX:7:22:1621:462/2 +ACTAGCCCCAATATCAATCCTATATCAAATCTCACC ++HWI-EAS91_1_30788AAXX:7:22:1621:462/2 +hJhhhhChhhJhhhRhhKhePhc\KhhV\KhXhJhh +@HWI-EAS91_1_30788AAXX:7:45:408:807/2 +ATGAGTGCTAGGATCAGGATGGAGAGGATTAGGGCT ++HWI-EAS91_1_30788AAXX:7:45:408:807/2 +hhhhhhhhhhhhhhhhhh`hhhZh`hhhhhRXhhYh +@HWI-EAS91_1_30788AAXX:7:49:654:1439/2 +TAGGTTTATTGATAGTTGTGTTGTTGGTGTAAATGG ++HWI-EAS91_1_30788AAXX:7:49:654:1439/2 +hhhhhhhhhhhhhhhhhhhhhhhhhdhh_hG\XhU@ +@HWI-EAS91_1_30788AAXX:7:64:947:234/2 +CCTATTAGTGTGTAGAATAGGAAGTAGAGGCCTGCG ++HWI-EAS91_1_30788AAXX:7:64:947:234/2 +hhhhhhhh^hPhWfhhhhThWUhhfhh_hhNIVPUd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Thu Jan 23 12:30:57 2014 -0500 @@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="galaxy_sequence_utils" version="1.0.0"> + <repository changeset_revision="195699b1562a" name="package_galaxy_utils_1_0" owner="devteam" prior_installation_required="False" toolshed="http://testtoolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>