Mercurial > repos > lparsons > htseq_count

--- a/htseq-count.xml	Tue Sep 20 17:56:25 2016 -0400
+++ b/htseq-count.xml	Fri Sep 23 07:48:44 2016 -0400
@@ -1,4 +1,4 @@
-<tool id="htseq_count" name="htseq-count" version="0.6.1galaxy2">
+<tool id="htseq_count" name="htseq-count" version="0.6.1galaxy2" profile="16.04">
     <description> - Count aligned reads in a BAM file that overlap features in a GFF file</description>
     <requirements>
         <requirement type="package" version="0.6.1.post1">htseq</requirement>
@@ -28,6 +28,7 @@
             #set $reference_fasta_filename = str( $samout_conditional.reference_source.ref_file.fields.path )
         #end if
     #end if
+
     #if $force_sort == "True":
         #if $samfile.extension == 'bam':
             samtools sort -n "$samfile" "name_sorted_alignment" &&
@@ -35,14 +36,15 @@
             samtools view -Su -t "${reference_fasta_filename}.fai" "$samfile" | samtools sort -n - "name_sorted_alignment" &&
         #end if
     #end if
+
     htseq-count
-    --mode=$mode
-    --stranded=$stranded
-    --minaqual=$minaqual
-    --type="$featuretype"
-    --idattr="$idattr"
+        --mode=$mode
+        --stranded=$stranded
+        --minaqual=$minaqual
+        --type="$featuretype"
+        --idattr="$idattr"
     #if $samout_conditional.samout == "Yes":
-        --samout=$__new_file_path__/${samoutfile.id}_tmp
+        --samout='$__new_file_path__/${samoutfile.id}_tmp'
     #end if
     #if $force_sort == "True":
         --order=name
@@ -51,12 +53,18 @@
     #else
         --order=pos
         --format=$samfile.extension
-        $samfile
+        '$samfile'
     #end if
-    "$gfffile"
-    | awk '{if ($1 ~ "no_feature|ambiguous|too_low_aQual|not_aligned|alignment_not_unique") print $0 | "cat 1>&2"; else print $0}' > $counts 2>$othercounts
+
+    "$gfffile" | awk '{if ($1 ~ "no_feature|ambiguous|too_low_aQual|not_aligned|alignment_not_unique") print $0 | "cat 1>&2"; else print $0}'
+         > '$counts'
+        2> '$othercounts'
+
     #if $samout_conditional.samout == "Yes":
-        && samtools view -Su -t "${reference_fasta_filename}.fai" "$__new_file_path__/${samoutfile.id}_tmp" | samtools sort -o - sorted > "$samoutfile"
+        && samtools view -Su
+            -t "${reference_fasta_filename}.fai"
+            "$__new_file_path__/${samoutfile.id}_tmp"
+        | samtools sort -o - name_sorted_alignment > "$samoutfile"
     #end if
     ]]>
     </command>
@@ -105,7 +113,7 @@
                             <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
                         </param>
                     </when>
-                    <when value="history"> <!-- FIX ME!!!! -->
+                    <when value="history">
                         <param name="ref_file" type="data" format="fasta" label="Using reference file" />
                     </when>
                 </conditional>
@@ -120,7 +128,11 @@
     </inputs>

     <outputs>
-        <data format="tabular" name="counts" metadata_source="samfile" label="${tool.name} on ${on_string}"/>
+        <data format="tabular" name="counts" metadata_source="samfile" label="${tool.name} on ${on_string}">
+            <actions>
+                <action name="column_names" type="metadata" default="Geneid,${samfile.name}" />
+            </actions>
+        </data>
         <data format="tabular" name="othercounts" metadata_source="samfile" label="${tool.name} on ${on_string} (no feature)"/>
         <data format="bam" name="samoutfile" metadata_source="samfile" label="${tool.name} on ${on_string} (BAM)">
             <filter>samout_conditional['samout'] == "Yes"</filter>
@@ -167,19 +179,17 @@
             <output name="counts" file="htseq-test-paired_counts.tsv" />
             <output name="othercounts" file="htseq-test-paired_othercounts.tsv" />
         </test>
-
-        <!-- Seems to be an issue setting the $reference_fasta_filename variable during test
+
         <test>
             <param name="samfile" value="htseq-test.sam" />
             <param name="gfffile" value="htseq-test.gff" />
-            <param name="samout" value="True" />
+            <param name="samout" value="Yes" />
             <param name="reference_source_selector" value="history" />
             <param name="ref_file" value="htseq-test_reference.fasta" />
             <output name="counts" file="htseq-test_counts.tsv" />
             <output name="othercounts" file="htseq-test_othercounts.tsv" />
             <output name="samoutfile" file="htseq-test_samout.bam" />
         </test>
-        -->
     </tests>

     <help>
@@ -277,28 +287,6 @@
     </help>

     <citations>
-        <citation type="bibtex">
-@article{anders_htseqpython_2015,
-    title = {{HTSeq}—a {Python} framework to work with high-throughput sequencing data},
-    volume = {31},
-    issn = {1367-4803, 1460-2059},
-    url = {http://bioinformatics.oxfordjournals.org/content/31/2/166},
-    doi = {10.1093/bioinformatics/btu638},
-    abstract = {Motivation: A large choice of tools exists for many standard tasks in the analysis of high-throughput sequencing (HTS) data. However, once a project deviates from standard workflows, custom scripts are needed.
-Results: We present HTSeq, a Python library to facilitate the rapid development of such scripts. HTSeq offers parsers for many common data formats in HTS projects, as well as classes to represent data, such as genomic coordinates, sequences, sequencing reads, alignments, gene model information and variant calls, and provides data structures that allow for querying via genomic coordinates. We also present htseq-count, a tool developed with HTSeq that preprocesses RNA-Seq data for differential expression analysis by counting the overlap of reads with genes.
-Availability and implementation: HTSeq is released as an open-source software under the GNU General Public Licence and available from http://www-huber.embl.de/HTSeq or from the Python Package Index at https://pypi.python.org/pypi/HTSeq.
-Contact: sanders\{at\}fs.tum.de},
-    language = {en},
-    number = {2},
-    urldate = {2015-04-21},
-    journal = {Bioinformatics},
-    author = {Anders, Simon and Pyl, Paul Theodor and Huber, Wolfgang},
-    month = jan,
-    year = {2015},
-    pmid = {25260700},
-    pages = {166--169},
-    }
-}
-        </citation>
+        <citation type="doi">10.1093/bioinformatics/btu638</citation>
     </citations>
 </tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/htseq-test_reference.fasta	Fri Sep 23 07:48:44 2016 -0400
@@ -0,0 +1,36 @@
+>2-micron
+actg
+>MT
+actg
+>I
+actg
+>VI
+actg
+>III
+actg
+>IX
+actg
+>VIII
+actg
+>V
+actg
+>XI
+actg
+>X
+actg
+>XIV
+actg
+>II
+actg
+>XIII
+actg
+>XVI
+actg
+>XII
+actg
+>VII
+actg
+>XV
+actg
+>IV
+actg
Binary file test-data/htseq-test_samout.bam has changed