comparison htseq-count.xml @ 5:0a835934d792

Version 0.3
author lparsons
date Tue, 05 Mar 2013 12:26:28 -0500
parents 359d40333595
children 08a11d1eaec6
comparison
equal deleted inserted replaced
4:359d40333595 5:0a835934d792
1 <tool id="htseq_count" name="htseq-count" version="0.1"> 1 <tool id="htseq_count" name="htseq-count" version="0.3">
2 <description> - Count aligned reads in a BAM file that overlap features in a GFF file</description> 2 <description> - Count aligned reads in a BAM file that overlap features in a GFF file</description>
3 <version_command>htseq-count -h | grep version | sed 's/^\(.*\)*\(version .*\)\./\2/'</version_command> 3 <version_command>htseq-count -h | grep version | sed 's/^\(.*\)*\(version .*\)\./\2/'</version_command>
4 <requirements> 4 <requirements>
5 <requirement type="package" version="1.6.2">numpy</requirement>
5 <requirement type="package" version="0.5.3p9">htseq</requirement> 6 <requirement type="package" version="0.5.3p9">htseq</requirement>
6 <requirement type="package" version="0.1.18">samtools</requirement> 7 <requirement type="package" version="0.1.18">samtools</requirement>
8 <requirement type="package" version="1.56.0">picard</requirement>
7 </requirements> 9 </requirements>
8 <command> 10 <command>
9 ##set up input files 11 ##set up input files
10 #set $reference_fasta_filename = "localref.fa" 12 #set $reference_fasta_filename = "localref.fa"
11 #if $samout_conditional.samout: 13 #if $samout_conditional.samout:
14 samtools faidx "${reference_fasta_filename}" 2&gt;&amp;1 || echo "Error running samtools faidx for htseq-count" &gt;&amp;2 &amp;&amp; 16 samtools faidx "${reference_fasta_filename}" 2&gt;&amp;1 || echo "Error running samtools faidx for htseq-count" &gt;&amp;2 &amp;&amp;
15 #else: 17 #else:
16 #set $reference_fasta_filename = str( $samout_conditional.reference_source.ref_file.fields.path ) 18 #set $reference_fasta_filename = str( $samout_conditional.reference_source.ref_file.fields.path )
17 #end if 19 #end if
18 #end if 20 #end if
19 21 #if str($singlepaired) == "paired":
20 #if $samfile.extension == "bam": 22 ln -s $samfile local_input.sam &amp;&amp;
21 samtools view $samfile | 23 java -Xmx2G -jar "\$JAVA_JAR_PATH/SortSam.jar" VALIDATION_STRINGENCY=LENIENT SORT_ORDER=queryname O=prepared_input.sam I=local_input.sam TMP_DIR="${__new_file_path__}"
24 || echo "Error running Picard MergeSamFiles" &gt;&amp;2 &amp;&amp;
25 #else:
26 #if $samfile.extension == "bam":
27 samtools view $samfile |
28 #else
29 ln -s $samfile prepared_input.sam &amp;&amp;
30 #end if
22 #end if 31 #end if
23 htseq-count 32 htseq-count
24 --mode=$mode 33 --mode=$mode
25 --stranded=$stranded 34 --stranded=$stranded
26 --minaqual=$minaqual 35 --minaqual=$minaqual
27 --type=$type 36 --type=$featuretype
28 --idattr=$idattr 37 --idattr=$idattr
29 #if $samout_conditional.samout: 38 #if $samout_conditional.samout:
30 --samout=$__new_file_path__/${samoutfile.id}_tmp 39 --samout=$__new_file_path__/${samoutfile.id}_tmp
31 #end if 40 #end if
32 #if $samfile.extension == "bam": 41 #if str($singlepaired) == "paired":
33 - 42 prepared_input.sam
34 #else 43 #else:
35 $samfile 44 #if $samfile.extension == "bam":
36 #end if 45 -
46 #else:
47 prepared_input.sam
48 #end if
49 #end if
37 $gfffile 50 $gfffile
38 &gt; $counts 51 | awk '{if ($1 ~ "no_feature|ambiguous|too_low_aQual|not_aligned|alignment_not_unique") print $0 | "cat 1>&amp;2"; else print $0}' &gt; $counts 2&gt;$othercounts
39 #if $samout_conditional.samout: 52 #if $samout_conditional.samout:
40 &amp;&amp; samtools view -Su -t ${reference_fasta_filename}.fai $__new_file_path__/${samoutfile.id}_tmp | samtools sort -o - sorted > $samoutfile 53 &amp;&amp; samtools view -Su -t ${reference_fasta_filename}.fai $__new_file_path__/${samoutfile.id}_tmp | samtools sort -o - sorted > $samoutfile
41 #end if</command> 54 #end if</command>
42 <inputs> 55 <inputs>
43 <param format="sam, bam" name="samfile" type="data" label="Aligned SAM File"> 56 <param format="sam, bam" name="samfile" type="data" label="Aligned SAM/BAM File"/>
44 <help>Paired-End data must be sorted by QUERY NAME, use Picard Read Mate Fixer and Query name sort order before using this tool on paired data</help> 57 <param name="singlepaired" type="select" label="Is this library mate-paired?">
58 <help>Paired libraries will be sorted by read name prior to counting.</help>
59 <option value="single" selected="true">single-end</option>
60 <option value="paired">paired-end</option>
45 </param> 61 </param>
46 <param format="gff" name="gfffile" type="data" label="GFF File"/> 62 <param format="gff" name="gfffile" type="data" label="GFF File"/>
47 <param name="mode" type="select" label="Mode"> 63 <param name="mode" type="select" label="Mode">
48 <help>Mode to handle reads overlapping more than one feature.</help> 64 <help>Mode to handle reads overlapping more than one feature.</help>
49 <option value="union" selected="true">Union</option> 65 <option value="union" selected="true">Union</option>
57 <option value="reverse">Reverse</option> 73 <option value="reverse">Reverse</option>
58 </param> 74 </param>
59 <param name="minaqual" type="integer" value="0" label="Minimum alignment quality"> 75 <param name="minaqual" type="integer" value="0" label="Minimum alignment quality">
60 <help>Skip all reads with alignment quality lower than the given minimum value</help> 76 <help>Skip all reads with alignment quality lower than the given minimum value</help>
61 </param> 77 </param>
62 <param name="type" type="text" value="exon" label="Feature type"> 78 <param name="featuretype" type="text" value="exon" label="Feature type">
63 <help>Feature type (3rd column in GFF file) to be used. All features of other types are ignored. The default, suitable for RNA-Seq and Ensembl GTF files, is exon.</help> 79 <help>Feature type (3rd column in GFF file) to be used. All features of other types are ignored. The default, suitable for RNA-Seq and Ensembl GTF files, is exon.</help>
64 </param> 80 </param>
65 <param name="idattr" type="text" value="gene_id" label="ID Attribute"> 81 <param name="idattr" type="text" value="gene_id" label="ID Attribute">
66 <help>GFF attribute to be used as feature ID. Several GFF lines with the same feature ID will be considered as parts of the same feature. The feature ID is used to identity the counts in the output table. The default, suitable for RNA-SEq and Ensembl GTF files, is gene_id.</help> 82 <help>GFF attribute to be used as feature ID. Several GFF lines with the same feature ID will be considered as parts of the same feature. The feature ID is used to identity the counts in the output table. All features of the specified type MUST have a value for this attribute. The default, suitable for RNA-SEq and Ensembl GTF files, is gene_id.</help>
67 </param> 83 </param>
68 <conditional name="samout_conditional"> 84 <conditional name="samout_conditional">
69 <param name="samout" type="boolean" value="False" truevalue="True" falsevalue="False" label="Additional BAM Output"> 85 <param name="samout" type="boolean" value="False" truevalue="True" falsevalue="False" label="Additional BAM Output">
70 <help>Write out all SAM alignment records into an output BAM file, annotating each line with its assignment to a feature or a special counter (as an optional field with tag ‘XF’).</help> 86 <help>Write out all SAM alignment records into an output BAM file, annotating each line with its assignment to a feature or a special counter (as an optional field with tag ‘XF’).</help>
71 </param> 87 </param>
76 <option value="history">History</option> 92 <option value="history">History</option>
77 </param> 93 </param>
78 <when value="cached"> 94 <when value="cached">
79 <param name="ref_file" type="select" label="Using reference genome"> 95 <param name="ref_file" type="select" label="Using reference genome">
80 <options from_data_table="sam_fa_indexes"> 96 <options from_data_table="sam_fa_indexes">
81 <filter type="data_meta" key="dbkey" ref="samfile" column="3"/> 97 <filter type="data_meta" key="dbkey" ref="samfile" column="1"/>
82 </options> 98 </options>
83 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> 99 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
84 </param> 100 </param>
85 </when> 101 </when>
86 <when value="history"> <!-- FIX ME!!!! --> 102 <when value="history"> <!-- FIX ME!!!! -->
90 </when> 106 </when>
91 </conditional> 107 </conditional>
92 </inputs> 108 </inputs>
93 109
94 <outputs> 110 <outputs>
95 <data format="tabular" name="counts" label="${tool.name} on ${on_string}"/> 111 <data format="tabular" name="counts" metadata_source="samfile" label="${tool.name} on ${on_string}"/>
96 <data format="bam" name="samoutfile" label="${tool.name} on ${on_string} (BAM)"> 112 <data format="tabular" name="othercounts" metadata_source="samfile" label="${tool.name} on ${on_string} (no feature)"/>
113 <data format="bam" name="samoutfile" metadata_source="samfile" label="${tool.name} on ${on_string} (BAM)">
97 <filter>samout_conditional['samout']</filter> 114 <filter>samout_conditional['samout']</filter>
98 </data> 115 </data>
99 </outputs> 116 </outputs>
100 117
101 <stdio> 118 <stdio>
102 <exit_code range="1:" level="fatal" description="Unknown error occurred" /> 119 <exit_code range="1:" level="fatal" description="Unknown error occurred" />
120 <regex match="htseq-count: command not found" source="stderr" level="fatal" description="The HTSeq python package is not properly installed, contact Galaxy administrators" />
121 <regex match="samtools: command not found" source="stderr" level="fatal" description="The samtools package is not properly installed, contact Galaxy administrators" />
122 <regex match="Error: Feature (.+) does not contain a '(.+)' attribute" source="both" level="fatal" description="Error parsing the GFF file, at least one feature of the specified 'Feature type' does not have a value for the specified 'ID Attribute'" />
123 <regex match="Error occured in line (\d+) of file" source="stderr" level="fatal" description="Unknown error parsing the GFF file" />
124 <regex match="Error" source="stderr" level="fatal" description="Unknown error occured" />
125 <regex match="Warning: Read (.+) claims to have an aligned mate which could not be found. \(Is the SAM file properly sorted\?\)" source="stderr" level="warning" description="PAIRED DATA MISSING OR NOT PROPERLY SORTED. Try reruning and selecting the paired-end option. See stderr output of this dataset for more information." />
103 </stdio> 126 </stdio>
104 127
105 <tests> 128 <tests>
106 <test> 129 <test>
107 <param name="samfile" value="htseq-test.sam" /> 130 <param name="samfile" value="htseq-test.sam" />
108 <param name="gfffile" value="htseq-test.gff" /> 131 <param name="gfffile" value="htseq-test.gff" />
109 <param name="samout" value="False" /> 132 <param name="samout" value="False" />
110 <output name="counts" file="htseq-test_counts.tsv" /> 133 <output name="counts" file="htseq-test_counts.tsv" />
134 <output name="othercounts" file="htseq-test_othercounts.tsv" />
111 </test> 135 </test>
112 <test> 136 <test>
113 <param name="samfile" value="htseq-test.bam" /> 137 <param name="samfile" value="htseq-test.bam" />
114 <param name="gfffile" value="htseq-test.gff" /> 138 <param name="gfffile" value="htseq-test.gff" />
115 <param name="samout" value="False" /> 139 <param name="samout" value="False" />
116 <output name="counts" file="htseq-test_counts.tsv" /> 140 <output name="counts" file="htseq-test_counts.tsv" />
141 <output name="othercounts" file="htseq-test_othercounts.tsv" />
142 </test>
143 <test>
144 <param name="samfile" value="htseq-test-paired.bam" />
145 <param name="singlepaired" value="paired" />
146 <param name="gfffile" value="htseq-test.gff" />
147 <param name="samout" value="False" />
148 <output name="counts" file="htseq-test-paired_counts.tsv" />
149 <output name="othercounts" file="htseq-test-paired_othercounts.tsv" />
117 </test> 150 </test>
118 <!-- Seems to be an issue setting the $reference_fasta_filename variable during test 151 <!-- Seems to be an issue setting the $reference_fasta_filename variable during test
119 <test> 152 <test>
120 <param name="samfile" value="htseq-test.sam" /> 153 <param name="samfile" value="htseq-test.sam" />
121 <param name="gfffile" value="htseq-test.gff" /> 154 <param name="gfffile" value="htseq-test.gff" />
122 <param name="samout" value="True" /> 155 <param name="samout" value="True" />
123 <param name="reference_source_selector" value="history" /> 156 <param name="reference_source_selector" value="history" />
124 <param name="ref_file" value="htseq-test_reference.fasta" /> 157 <param name="ref_file" value="htseq-test_reference.fasta" />
125 <output name="counts" file="htseq-test_counts.tsv" /> 158 <output name="counts" file="htseq-test_counts.tsv" />
159 <output name="othercounts" file="htseq-test_othercounts.tsv" />
126 <output name="samoutfile" file="htseq-test_samout.bam" /> 160 <output name="samoutfile" file="htseq-test_samout.bam" />
127 </test> 161 </test>
128 --> 162 -->
129 </tests> 163 </tests>
130 164