changeset 19:8bc865a85024 draft

planemo upload for repository https://github.com/lparsons/galaxy_tools/tree/master/tools/htseq_count commit e53f827acbedaec2da3a44488fb6628c9e922055
author lparsons
date Mon, 22 Jun 2015 14:14:17 -0400
parents 737cda668bdd
children 6b61ba6dd9e2
files fabfile.py htseq-count.xml
diffstat 2 files changed, 50 insertions(+), 37 deletions(-) [+]
line wrap: on
line diff
--- a/fabfile.py	Mon Jun 22 12:42:30 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,26 +0,0 @@
-from fabric.api import local
-from fabric.operations import prompt
-import os
-
-
-def package():
-    '''
-    Pacakge for upload to toolshed
-      packaging 'test' version (default) uses local directory
-      otherwise, specify a mercurial tag to package
-    '''
-    package_dir = 'package'
-    base_filename = os.path.join(package_dir, 'htseq-count')
-    version = prompt("Enter version number for package [test]:")
-    revision_option = ''
-    if version != '':
-        revision_option = '-r "%s"' % version
-    else:
-        version = 'test'
-    version_filename = '%s_%s.tar.gz' % (base_filename, version)
-    local('mkdir -p %s' % package_dir)
-    local('rm -f %s' % version_filename)
-    if version == 'test':
-        local('tar czvf %s --exclude "fabfile.*" --exclude "%s" --exclude ".hg*" --exclude ".DS_Store" --exclude "*.pyc" --exclude "*.swp" *' % (version_filename, package_dir))
-    else:
-        local('hg archive -t tgz %s -X "fabfile.*" -X "package" -X ".hg*" -p . "%s"' % (revision_option, version_filename))
--- a/htseq-count.xml	Mon Jun 22 12:42:30 2015 -0400
+++ b/htseq-count.xml	Mon Jun 22 14:14:17 2015 -0400
@@ -13,25 +13,30 @@
         <regex match="Error: Feature (.+) does not contain a '(.+)' attribute" source="both" level="fatal" description="Error parsing the GFF file, at least one feature of the specified 'Feature type' does not have a value for the specified 'ID Attribute'" />
         <regex match="Error occured in line (\d+) of file" source="stderr" level="fatal" description="Unknown error parsing the GFF file" />
         <regex match="Error" source="stderr" level="fatal" description="Unknown error occured" />
-        <regex match="Warning: Read (.+) claims to have an aligned mate which could not be found. \(Is the SAM file properly sorted\?\)" source="stderr" level="warning" description="PAIRED DATA MISSING OR NOT PROPERLY SORTED. Try reruning and selecting the paired-end option. See stderr output of this dataset for more information." />
+        <regex match="Warning: Read (.+) claims to have an aligned mate which could not be found. \(Is the SAM file properly sorted\?\)" source="stderr" level="warning" description="PAIRED DATA MISSING OR NOT PROPERLY SORTED. Try reruning and selecting the option to 'Force sorting of SAM/BAM file by NAME'. See stderr output of this dataset for more information." />
     </stdio>
 
     <version_command>htseq-count -h | grep version | sed 's/^\(.*\)*\(version .*\)\./\2/'</version_command>
 
-    <command>
+    <command><![CDATA[
     ##set up input files
     #set $reference_fasta_filename = "localref.fa"
     #if $samout_conditional.samout:
         #if str( $samout_conditional.reference_source.reference_source_selector ) == "history":
-            ln -s "${samout_conditional.reference_source.ref_file}" "${reference_fasta_filename}" &amp;&amp;
-            samtools faidx "${reference_fasta_filename}" 2&gt;&amp;1 || echo "Error running samtools faidx for htseq-count" &gt;&amp;2 &amp;&amp;
+            ln -s "${samout_conditional.reference_source.ref_file}" "${reference_fasta_filename}" &&
+            samtools faidx "${reference_fasta_filename}" 2>&1 || echo "Error running samtools faidx for htseq-count" >&2 &&
         #else:
             #set $reference_fasta_filename = str( $samout_conditional.reference_source.ref_file.fields.path )
         #end if
     #end if
+    #if $force_sort:
+        #if $samfile.extension == 'bam':
+            samtools sort -n $samfile "name_sorted_alignment" &&
+        #else
+            samtools view -Su -t ${reference_fasta_filename}.fai $samfile | samtools sort -n - "name_sorted_alignment" &&
+        #end if
+    #end if
     htseq-count
-    --format=$samfile.extension
-    --order=pos
     --mode=$mode
     --stranded=$stranded
     --minaqual=$minaqual
@@ -40,12 +45,22 @@
     #if $samout_conditional.samout:
         --samout=$__new_file_path__/${samoutfile.id}_tmp
     #end if
-    $samfile
+    #if $force_sort:
+        --order=name
+        --format=bam
+        name_sorted_alignment.bam
+    #else
+        --order=pos
+        --format=$samfile.extension
+        $samfile
+    #end if
     $gfffile
-    | awk '{if ($1 ~ "no_feature|ambiguous|too_low_aQual|not_aligned|alignment_not_unique") print $0 | "cat 1>&amp;2"; else print $0}' &gt; $counts 2&gt;$othercounts
+    | awk '{if ($1 ~ "no_feature|ambiguous|too_low_aQual|not_aligned|alignment_not_unique") print $0 | "cat 1>&2"; else print $0}' > $counts 2>$othercounts
     #if $samout_conditional.samout:
-        &amp;&amp; samtools view -Su -t ${reference_fasta_filename}.fai $__new_file_path__/${samoutfile.id}_tmp | samtools sort -o - sorted > $samoutfile
-    #end if</command>
+        && samtools view -Su -t ${reference_fasta_filename}.fai $__new_file_path__/${samoutfile.id}_tmp | samtools sort -o - sorted > $samoutfile
+    #end if
+    ]]>
+    </command>
 
     <inputs>
         <param format="sam,bam" name="samfile" type="data" label="Aligned SAM/BAM File"/>
@@ -69,7 +84,7 @@
             <help>Feature type (3rd column in GFF file) to be used. All features of other types are ignored. The default, suitable for RNA-Seq and Ensembl GTF files, is exon.</help>
         </param>
         <param name="idattr" type="text" value="gene_id" label="ID Attribute">
-            <help>GFF attribute to be used as feature ID. Several GFF lines with the same feature ID will be considered as parts of the same feature. The feature ID is used to identity the counts in the output table. All features of the specified type MUST have a value for this attribute. The default, suitable for RNA-SEq and Ensembl GTF files, is gene_id.</help>
+            <help>GFF attribute to be used as feature ID. Several GFF lines with the same feature ID will be considered as parts of the same feature. The feature ID is used to identity the counts in the output table. All features of the specified type MUST have a value for this attribute. The default, suitable for RNA-Seq and Ensembl GTF files, is gene_id.</help>
         </param>
         <conditional name="samout_conditional">
             <param name="samout" type="boolean" value="False" truevalue="True" falsevalue="False" label="Additional BAM Output">
@@ -95,6 +110,9 @@
                 </conditional>
             </when>
         </conditional>
+        <param name="force_sort" type="boolean" value="False" truevalue="True" falsevalue="False" label="Force sorting of SAM/BAM file by NAME">
+            <help>This option can be used for for paired-end data that has many unmapped mates. Use this if you get the warning about paired end data missing or not being properly sorted.</help>
+        </param>
     </inputs>
 
     <outputs>
@@ -114,6 +132,14 @@
             <output name="othercounts" file="htseq-test_othercounts.tsv" />
         </test>
         <test>
+            <param name="samfile" value="htseq-test.sam" />
+            <param name="gfffile" value="htseq-test.gff" />
+            <param name="samout" value="False" />
+            <param name="force_sort" value="True" />
+            <output name="counts" file="htseq-test_counts.tsv" />
+            <output name="othercounts" file="htseq-test_othercounts.tsv" />
+        </test>
+        <test>
             <param name="samfile" value="htseq-test.bam" />
             <param name="gfffile" value="htseq-test.gff" />
             <param name="samout" value="False" />
@@ -128,6 +154,16 @@
             <output name="counts" file="htseq-test-paired_counts.tsv" />
             <output name="othercounts" file="htseq-test-paired_othercounts.tsv" />
         </test>
+        <test>
+            <param name="samfile" value="htseq-test-paired.bam" />
+            <param name="singlepaired" value="paired" />
+            <param name="gfffile" value="htseq-test.gff" />
+            <param name="samout" value="False" />
+            <param name="force_sort" value="True" />
+            <output name="counts" file="htseq-test-paired_counts.tsv" />
+            <output name="othercounts" file="htseq-test-paired_othercounts.tsv" />
+        </test>
+
         <!-- Seems to be an issue setting the $reference_fasta_filename variable during test
         <test>
             <param name="samfile" value="htseq-test.sam" />
@@ -143,6 +179,7 @@
     </tests>
 
     <help>
+<![CDATA[
 Overview
 --------
 
@@ -232,7 +269,9 @@
 Written by Simon Anders (sanders@fs.tum.de), European Molecular Biology
 Laboratory (EMBL). (c) 2010. Released under the terms of the GNU General
 Public License v3. Part of the 'HTSeq' framework.
+]]>
     </help>
+
     <citations>
         <citation type="bibtex">
 @article{anders_htseqpython_2015,