annotate make_families.xml @ 3:13bcc2f459b0 draft

planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
author nick
date Mon, 23 Nov 2015 18:07:11 -0500
parents 71ace43428ca
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
1 <?xml version="1.0"?>
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
2 <tool id="make_families" name="Make families" version="0.1">
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
3 <description>from duplex sequencing data</description>
3
13bcc2f459b0 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents: 2
diff changeset
4 <requirements>
13bcc2f459b0 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents: 2
diff changeset
5 <requirement type="package" version="0.1">duplex</requirement>
13bcc2f459b0 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents: 2
diff changeset
6 <!-- <requirement type="set_environment">DUPLEX_DIR</requirement> -->
13bcc2f459b0 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents: 2
diff changeset
7 </requirements>
0
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
8 <command>paste $fastq1 $fastq2
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
9 | paste - - - -
2
71ace43428ca planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents: 1
diff changeset
10 | awk -f \$DUPLEX_DIR/make-barcodes.awk -v TAG_LEN=$taglen -v INVARIANT=$invariant
0
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
11 | sort
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
12 &gt; $output
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
13 </command>
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
14 <inputs>
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
15 <param name="fastq1" type="data" format="fastq" label="Sequencing reads, mate 1"/>
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
16 <param name="fastq2" type="data" format="fastq" label="Sequencing reads, mate 2"/>
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
17 <param name="taglen" type="integer" value="12" min="0" label="Tag length" help="length of each random barcode on the ends of the fragments"/>
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
18 <param name="invariant" type="integer" value="5" min="0" label="Invariant sequence length" help="length of the sequence between the tag and actual sample sequence (the restriction site, normally)"/>
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
19 </inputs>
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
20 <outputs>
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
21 <data name="output" format="tabular"/>
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
22 </outputs>
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
23 <tests>
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
24 <test>
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
25 <param name="fastq1" value="smoke_1.fq"/>
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
26 <param name="fastq2" value="smoke_2.fq"/>
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
27 <param name="taglen" value="5"/>
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
28 <param name="invariant" value="1"/>
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
29 <output name="output" file="smoke.families.tsv"/>
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
30 </test>
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
31 <test>
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
32 <param name="fastq1" value="smoke_1.fq"/>
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
33 <param name="fastq2" value="smoke_2.fq"/>
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
34 <param name="taglen" value="5"/>
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
35 <param name="invariant" value="0"/>
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
36 <output name="output" file="smoke.families.i0.tsv"/>
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
37 </test>
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
38 </tests>
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
39 <help>
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
40
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
41 **What it does**
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
42
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
43 This tool is for processing raw duplex sequencing data, removing the barcodes and grouping by them into families of reads from the same fragment.
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
44
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
45 -----
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
46
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
47 **Output**
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
48
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
49 The output will be a tabular file where each line corresponds to a pair of input reads.
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
50
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
51 The columns are::
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
52
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
53 1: barcode (both tags joined and ordered)
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
54 2: tag order in barcode ("ab" or "ba")
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
55 3: read1 name
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
56 4: read1 sequence (minus the tag and invariant sequences)
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
57 5: read1 quality scores (minus the same tag and invariant)
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
58 6: read2 name
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
59 7: read2 sequence (minus the tag and invariant sequences)
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
60 8: read2 quality scores (minus the same tag and invariant)
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
61
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
62 -----
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
63
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
64 **Barcode creation**
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
65
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
66 For each pair, the tool will remove the tag at the beginning of each read and create a barcode by concatenating the two tags. The order of the tags is determined by a string comparison so that it will make an identical barcode from pairs of either order. The original tag order will be noted in the second column.
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
67
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
68 Since pairs from opposite strands will have the same tags, but in the reverse order, this produces the same barcode for reads from the same fragment, regardless of strand. Then a simple sort will group all reads from the same strand together, separated into strands by the different "order" values.
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
69
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
70 Examples::
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
71
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
72 +---------------+-----------------+
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
73 | input tags | output |
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
74 +-------+-------+-------+---------+
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
75 | read1 | read2 | order | barcode |
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
76 +-------+-------+-------+---------+
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
77 | ATG | CCT | ab | ATGCCT |
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
78 +-------+-------+-------+---------+
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
79 | CCT | ATG | ba | ATGCCT |
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
80 +-------+-------+-------+---------+
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
81
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
82 </help>
4633a25d8c19 planemo upload commit 801bf168032a13f6405518bddb35a24c9e9a8cd4-dirty
nick
parents:
diff changeset
83 </tool>