Mercurial > repos > lparsons > htseq_count
comparison htseq-count.xml @ 19:8bc865a85024 draft
planemo upload for repository https://github.com/lparsons/galaxy_tools/tree/master/tools/htseq_count commit e53f827acbedaec2da3a44488fb6628c9e922055
| author | lparsons |
|---|---|
| date | Mon, 22 Jun 2015 14:14:17 -0400 |
| parents | 737cda668bdd |
| children | 6b61ba6dd9e2 |
comparison
equal
deleted
inserted
replaced
| 18:737cda668bdd | 19:8bc865a85024 |
|---|---|
| 11 <regex match="htseq-count: (command ){0,1}not found" source="stderr" level="fatal" description="The HTSeq python package is not properly installed, contact Galaxy administrators" /> | 11 <regex match="htseq-count: (command ){0,1}not found" source="stderr" level="fatal" description="The HTSeq python package is not properly installed, contact Galaxy administrators" /> |
| 12 <regex match="samtools: (command ){0,1}not found" source="stderr" level="fatal" description="The samtools package is not properly installed, contact Galaxy administrators" /> | 12 <regex match="samtools: (command ){0,1}not found" source="stderr" level="fatal" description="The samtools package is not properly installed, contact Galaxy administrators" /> |
| 13 <regex match="Error: Feature (.+) does not contain a '(.+)' attribute" source="both" level="fatal" description="Error parsing the GFF file, at least one feature of the specified 'Feature type' does not have a value for the specified 'ID Attribute'" /> | 13 <regex match="Error: Feature (.+) does not contain a '(.+)' attribute" source="both" level="fatal" description="Error parsing the GFF file, at least one feature of the specified 'Feature type' does not have a value for the specified 'ID Attribute'" /> |
| 14 <regex match="Error occured in line (\d+) of file" source="stderr" level="fatal" description="Unknown error parsing the GFF file" /> | 14 <regex match="Error occured in line (\d+) of file" source="stderr" level="fatal" description="Unknown error parsing the GFF file" /> |
| 15 <regex match="Error" source="stderr" level="fatal" description="Unknown error occured" /> | 15 <regex match="Error" source="stderr" level="fatal" description="Unknown error occured" /> |
| 16 <regex match="Warning: Read (.+) claims to have an aligned mate which could not be found. \(Is the SAM file properly sorted\?\)" source="stderr" level="warning" description="PAIRED DATA MISSING OR NOT PROPERLY SORTED. Try reruning and selecting the paired-end option. See stderr output of this dataset for more information." /> | 16 <regex match="Warning: Read (.+) claims to have an aligned mate which could not be found. \(Is the SAM file properly sorted\?\)" source="stderr" level="warning" description="PAIRED DATA MISSING OR NOT PROPERLY SORTED. Try reruning and selecting the option to 'Force sorting of SAM/BAM file by NAME'. See stderr output of this dataset for more information." /> |
| 17 </stdio> | 17 </stdio> |
| 18 | 18 |
| 19 <version_command>htseq-count -h | grep version | sed 's/^\(.*\)*\(version .*\)\./\2/'</version_command> | 19 <version_command>htseq-count -h | grep version | sed 's/^\(.*\)*\(version .*\)\./\2/'</version_command> |
| 20 | 20 |
| 21 <command> | 21 <command><![CDATA[ |
| 22 ##set up input files | 22 ##set up input files |
| 23 #set $reference_fasta_filename = "localref.fa" | 23 #set $reference_fasta_filename = "localref.fa" |
| 24 #if $samout_conditional.samout: | 24 #if $samout_conditional.samout: |
| 25 #if str( $samout_conditional.reference_source.reference_source_selector ) == "history": | 25 #if str( $samout_conditional.reference_source.reference_source_selector ) == "history": |
| 26 ln -s "${samout_conditional.reference_source.ref_file}" "${reference_fasta_filename}" && | 26 ln -s "${samout_conditional.reference_source.ref_file}" "${reference_fasta_filename}" && |
| 27 samtools faidx "${reference_fasta_filename}" 2>&1 || echo "Error running samtools faidx for htseq-count" >&2 && | 27 samtools faidx "${reference_fasta_filename}" 2>&1 || echo "Error running samtools faidx for htseq-count" >&2 && |
| 28 #else: | 28 #else: |
| 29 #set $reference_fasta_filename = str( $samout_conditional.reference_source.ref_file.fields.path ) | 29 #set $reference_fasta_filename = str( $samout_conditional.reference_source.ref_file.fields.path ) |
| 30 #end if | 30 #end if |
| 31 #end if | 31 #end if |
| 32 #if $force_sort: | |
| 33 #if $samfile.extension == 'bam': | |
| 34 samtools sort -n $samfile "name_sorted_alignment" && | |
| 35 #else | |
| 36 samtools view -Su -t ${reference_fasta_filename}.fai $samfile | samtools sort -n - "name_sorted_alignment" && | |
| 37 #end if | |
| 38 #end if | |
| 32 htseq-count | 39 htseq-count |
| 33 --format=$samfile.extension | |
| 34 --order=pos | |
| 35 --mode=$mode | 40 --mode=$mode |
| 36 --stranded=$stranded | 41 --stranded=$stranded |
| 37 --minaqual=$minaqual | 42 --minaqual=$minaqual |
| 38 --type=$featuretype | 43 --type=$featuretype |
| 39 --idattr=$idattr | 44 --idattr=$idattr |
| 40 #if $samout_conditional.samout: | 45 #if $samout_conditional.samout: |
| 41 --samout=$__new_file_path__/${samoutfile.id}_tmp | 46 --samout=$__new_file_path__/${samoutfile.id}_tmp |
| 42 #end if | 47 #end if |
| 43 $samfile | 48 #if $force_sort: |
| 49 --order=name | |
| 50 --format=bam | |
| 51 name_sorted_alignment.bam | |
| 52 #else | |
| 53 --order=pos | |
| 54 --format=$samfile.extension | |
| 55 $samfile | |
| 56 #end if | |
| 44 $gfffile | 57 $gfffile |
| 45 | awk '{if ($1 ~ "no_feature|ambiguous|too_low_aQual|not_aligned|alignment_not_unique") print $0 | "cat 1>&2"; else print $0}' > $counts 2>$othercounts | 58 | awk '{if ($1 ~ "no_feature|ambiguous|too_low_aQual|not_aligned|alignment_not_unique") print $0 | "cat 1>&2"; else print $0}' > $counts 2>$othercounts |
| 46 #if $samout_conditional.samout: | 59 #if $samout_conditional.samout: |
| 47 && samtools view -Su -t ${reference_fasta_filename}.fai $__new_file_path__/${samoutfile.id}_tmp | samtools sort -o - sorted > $samoutfile | 60 && samtools view -Su -t ${reference_fasta_filename}.fai $__new_file_path__/${samoutfile.id}_tmp | samtools sort -o - sorted > $samoutfile |
| 48 #end if</command> | 61 #end if |
| 62 ]]> | |
| 63 </command> | |
| 49 | 64 |
| 50 <inputs> | 65 <inputs> |
| 51 <param format="sam,bam" name="samfile" type="data" label="Aligned SAM/BAM File"/> | 66 <param format="sam,bam" name="samfile" type="data" label="Aligned SAM/BAM File"/> |
| 52 <param format="gff" name="gfffile" type="data" label="GFF File"/> | 67 <param format="gff" name="gfffile" type="data" label="GFF File"/> |
| 53 <param name="mode" type="select" label="Mode"> | 68 <param name="mode" type="select" label="Mode"> |
| 67 </param> | 82 </param> |
| 68 <param name="featuretype" type="text" value="exon" label="Feature type"> | 83 <param name="featuretype" type="text" value="exon" label="Feature type"> |
| 69 <help>Feature type (3rd column in GFF file) to be used. All features of other types are ignored. The default, suitable for RNA-Seq and Ensembl GTF files, is exon.</help> | 84 <help>Feature type (3rd column in GFF file) to be used. All features of other types are ignored. The default, suitable for RNA-Seq and Ensembl GTF files, is exon.</help> |
| 70 </param> | 85 </param> |
| 71 <param name="idattr" type="text" value="gene_id" label="ID Attribute"> | 86 <param name="idattr" type="text" value="gene_id" label="ID Attribute"> |
| 72 <help>GFF attribute to be used as feature ID. Several GFF lines with the same feature ID will be considered as parts of the same feature. The feature ID is used to identity the counts in the output table. All features of the specified type MUST have a value for this attribute. The default, suitable for RNA-SEq and Ensembl GTF files, is gene_id.</help> | 87 <help>GFF attribute to be used as feature ID. Several GFF lines with the same feature ID will be considered as parts of the same feature. The feature ID is used to identity the counts in the output table. All features of the specified type MUST have a value for this attribute. The default, suitable for RNA-Seq and Ensembl GTF files, is gene_id.</help> |
| 73 </param> | 88 </param> |
| 74 <conditional name="samout_conditional"> | 89 <conditional name="samout_conditional"> |
| 75 <param name="samout" type="boolean" value="False" truevalue="True" falsevalue="False" label="Additional BAM Output"> | 90 <param name="samout" type="boolean" value="False" truevalue="True" falsevalue="False" label="Additional BAM Output"> |
| 76 <help>Write out all SAM alignment records into an output BAM file, annotating each line with its assignment to a feature or a special counter (as an optional field with tag ‘XF’).</help> | 91 <help>Write out all SAM alignment records into an output BAM file, annotating each line with its assignment to a feature or a special counter (as an optional field with tag ‘XF’).</help> |
| 77 </param> | 92 </param> |
| 93 <param name="ref_file" type="data" format="fasta" label="Using reference file" /> | 108 <param name="ref_file" type="data" format="fasta" label="Using reference file" /> |
| 94 </when> | 109 </when> |
| 95 </conditional> | 110 </conditional> |
| 96 </when> | 111 </when> |
| 97 </conditional> | 112 </conditional> |
| 113 <param name="force_sort" type="boolean" value="False" truevalue="True" falsevalue="False" label="Force sorting of SAM/BAM file by NAME"> | |
| 114 <help>This option can be used for for paired-end data that has many unmapped mates. Use this if you get the warning about paired end data missing or not being properly sorted.</help> | |
| 115 </param> | |
| 98 </inputs> | 116 </inputs> |
| 99 | 117 |
| 100 <outputs> | 118 <outputs> |
| 101 <data format="tabular" name="counts" metadata_source="samfile" label="${tool.name} on ${on_string}"/> | 119 <data format="tabular" name="counts" metadata_source="samfile" label="${tool.name} on ${on_string}"/> |
| 102 <data format="tabular" name="othercounts" metadata_source="samfile" label="${tool.name} on ${on_string} (no feature)"/> | 120 <data format="tabular" name="othercounts" metadata_source="samfile" label="${tool.name} on ${on_string} (no feature)"/> |
| 112 <param name="samout" value="False" /> | 130 <param name="samout" value="False" /> |
| 113 <output name="counts" file="htseq-test_counts.tsv" /> | 131 <output name="counts" file="htseq-test_counts.tsv" /> |
| 114 <output name="othercounts" file="htseq-test_othercounts.tsv" /> | 132 <output name="othercounts" file="htseq-test_othercounts.tsv" /> |
| 115 </test> | 133 </test> |
| 116 <test> | 134 <test> |
| 117 <param name="samfile" value="htseq-test.bam" /> | 135 <param name="samfile" value="htseq-test.sam" /> |
| 118 <param name="gfffile" value="htseq-test.gff" /> | 136 <param name="gfffile" value="htseq-test.gff" /> |
| 119 <param name="samout" value="False" /> | 137 <param name="samout" value="False" /> |
| 138 <param name="force_sort" value="True" /> | |
| 120 <output name="counts" file="htseq-test_counts.tsv" /> | 139 <output name="counts" file="htseq-test_counts.tsv" /> |
| 121 <output name="othercounts" file="htseq-test_othercounts.tsv" /> | 140 <output name="othercounts" file="htseq-test_othercounts.tsv" /> |
| 122 </test> | 141 </test> |
| 123 <test> | 142 <test> |
| 143 <param name="samfile" value="htseq-test.bam" /> | |
| 144 <param name="gfffile" value="htseq-test.gff" /> | |
| 145 <param name="samout" value="False" /> | |
| 146 <output name="counts" file="htseq-test_counts.tsv" /> | |
| 147 <output name="othercounts" file="htseq-test_othercounts.tsv" /> | |
| 148 </test> | |
| 149 <test> | |
| 124 <param name="samfile" value="htseq-test-paired.bam" /> | 150 <param name="samfile" value="htseq-test-paired.bam" /> |
| 125 <param name="singlepaired" value="paired" /> | 151 <param name="singlepaired" value="paired" /> |
| 126 <param name="gfffile" value="htseq-test.gff" /> | 152 <param name="gfffile" value="htseq-test.gff" /> |
| 127 <param name="samout" value="False" /> | 153 <param name="samout" value="False" /> |
| 128 <output name="counts" file="htseq-test-paired_counts.tsv" /> | 154 <output name="counts" file="htseq-test-paired_counts.tsv" /> |
| 129 <output name="othercounts" file="htseq-test-paired_othercounts.tsv" /> | 155 <output name="othercounts" file="htseq-test-paired_othercounts.tsv" /> |
| 130 </test> | 156 </test> |
| 157 <test> | |
| 158 <param name="samfile" value="htseq-test-paired.bam" /> | |
| 159 <param name="singlepaired" value="paired" /> | |
| 160 <param name="gfffile" value="htseq-test.gff" /> | |
| 161 <param name="samout" value="False" /> | |
| 162 <param name="force_sort" value="True" /> | |
| 163 <output name="counts" file="htseq-test-paired_counts.tsv" /> | |
| 164 <output name="othercounts" file="htseq-test-paired_othercounts.tsv" /> | |
| 165 </test> | |
| 166 | |
| 131 <!-- Seems to be an issue setting the $reference_fasta_filename variable during test | 167 <!-- Seems to be an issue setting the $reference_fasta_filename variable during test |
| 132 <test> | 168 <test> |
| 133 <param name="samfile" value="htseq-test.sam" /> | 169 <param name="samfile" value="htseq-test.sam" /> |
| 134 <param name="gfffile" value="htseq-test.gff" /> | 170 <param name="gfffile" value="htseq-test.gff" /> |
| 135 <param name="samout" value="True" /> | 171 <param name="samout" value="True" /> |
| 141 </test> | 177 </test> |
| 142 --> | 178 --> |
| 143 </tests> | 179 </tests> |
| 144 | 180 |
| 145 <help> | 181 <help> |
| 182 <![CDATA[ | |
| 146 Overview | 183 Overview |
| 147 -------- | 184 -------- |
| 148 | 185 |
| 149 This tool takes an alignment file in SAM or BAM format and feature file in GFF format | 186 This tool takes an alignment file in SAM or BAM format and feature file in GFF format |
| 150 and calculates the number of reads mapping to each feature. It uses the *htseq-count* | 187 and calculates the number of reads mapping to each feature. It uses the *htseq-count* |
| 230 -q, --quiet suppress progress report and warnings | 267 -q, --quiet suppress progress report and warnings |
| 231 | 268 |
| 232 Written by Simon Anders (sanders@fs.tum.de), European Molecular Biology | 269 Written by Simon Anders (sanders@fs.tum.de), European Molecular Biology |
| 233 Laboratory (EMBL). (c) 2010. Released under the terms of the GNU General | 270 Laboratory (EMBL). (c) 2010. Released under the terms of the GNU General |
| 234 Public License v3. Part of the 'HTSeq' framework. | 271 Public License v3. Part of the 'HTSeq' framework. |
| 272 ]]> | |
| 235 </help> | 273 </help> |
| 274 | |
| 236 <citations> | 275 <citations> |
| 237 <citation type="bibtex"> | 276 <citation type="bibtex"> |
| 238 @article{anders_htseqpython_2015, | 277 @article{anders_htseqpython_2015, |
| 239 title = {{HTSeq}—a {Python} framework to work with high-throughput sequencing data}, | 278 title = {{HTSeq}—a {Python} framework to work with high-throughput sequencing data}, |
| 240 volume = {31}, | 279 volume = {31}, |
