diff mzsqlite_psm_align.xml @ 0:492f98d89e26 draft

planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/mzsqlite_psm_align commit 88e2fb9c31fbd687a0956924a870137d1fb9bee3-dirty
author jjohnson
date Tue, 10 Apr 2018 09:57:49 -0400
parents
children 46113c737b68
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mzsqlite_psm_align.xml	Tue Apr 10 09:57:49 2018 -0400
@@ -0,0 +1,108 @@
+<tool id="mzsqlite_psm_align" name="MzSQLite ProBED ProBAM" version="0.1.0">
+    <description>from mz.sqlite aand genomic mapping</description>
+    <requirements>
+        <requirement type="package">biopython</requirement>
+        <requirement type="package">twobitreader</requirement>
+        <requirement type="package">pysam</requirement>
+        <requirement type="package">gffutils</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        python '$__tool_directory__/mzsqlite_psm_align.py'  
+            #if $ref.ref_source == 'cached':
+                --twobit='$ref.ref_loc.fields.path'
+            #elif $ref.ref_source == 'history':
+                --twobit='$ref.ref_file'
+            #end if
+            #if $gffutilsdb:
+                --gffutils_file '$gffutilsdb'
+            #end if
+            #if $readlignments:
+                --reads_bam '$readlignments'
+            #end if
+            #if 'probed' in $output_formats:
+               --probed '$probed'
+            #end if
+            #if 'prosam' in $output_formats:
+               --prosam '$prosam'
+            #end if
+            #if 'probam' in $output_formats:
+               --probam '$probam'
+            #end if
+            #if $genomicref:
+               --genomeReference $genomicref
+            #else
+               --genomeReference $genomicdb.metadata.dbkey
+            #end if
+            '$mzsqlitedb' '$genomicdb'
+    ]]></command>
+    <inputs>
+        <param name="mzsqlitedb" type="data" format="mz.sqlite" label="mz.sqlite databse"/>
+        <param name="genomicdb" type="data" format="mz.sqlite" label="mz.sqlite databse"/>
+        <conditional name="ref">
+            <param name="ref_source" type="select" label="Source for Genomic Sequence Data">
+                <option value="cached">Locally cached twobit</option>
+                <option value="history">History dataset twobit</option>
+            </param>
+            <when value="cached">
+                <param name="ref_loc" type="select" label="Select reference 2bit file">
+                    <options from_data_table="twobit" />
+                </param>
+            </when>
+            <when value="history">
+                <param name="ref_file" type="data" format="twobit" label="reference 2bit file" />
+            </when>
+        </conditional>
+        <param name="gffutilsdb" type="data" format="sqlite" label="gffutils sqlite database" optional="true"/>
+        <param name="readlignments" type="data" format="bam" label="read alignments bam" optional="true"/>
+        <param name="genomicref" type="text" value="" label="Genome Reference name" optional="true"/>
+        <param name="output_formats" type="select" display="checkboxes" label="outputs" multiple="true">
+            <option value="probam">pro.bam</option>
+            <option value="prosam">pro.sam</option>
+            <option value="probed">pro.bed</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="prosam" format="pro.sam">
+            <filter>'prosam' in output_formats</filter>
+        </data>
+        <data name="probam" format="pro.bam">
+            <filter>'probam' in output_formats</filter>
+        </data>
+        <data name="probed" format="pro.bed">
+            <filter>'probed' in output_formats</filter>
+        </data>
+    </outputs>
+    <help><![CDATA[
+
+Generates proBAM or proBED feature alignment files for peptides identified from a mass spectrometry protein search analysis.
+
+The tool mz_to_sqlite generates the a SQLite database for a mzIdentML file, 
+along with the fasta search database and the spectrum files used in the search.
+
+The genomic mapping sqlite database has this schema:
+
+    CREATE TABLE feature_cds_map (	/* One row for each exon in the search protein */
+        name TEXT, 		/* Accession name of search protein in mzIdentML */
+        chrom TEXT, 		/* Reference genome chromosome for this exon */
+        start INTEGER, 		/* genomic start of the exon (zero-based like BED) */
+        end INTEGER, 		/* genomic end of the exon (non-incluse like BED) */
+        strand TEXT, 		/* genomic strand: '+' or '-' */
+        cds_start INTEGER, 	/* The CDS coding start for this exon (zero-based) */
+        cds_end INTEGER		/* The CDS coding start end this exon (non-inclusive) */
+    );
+
+Example:
+    sqlite> select * from feature_cds_map WHERE name like 'ENSMUSP00000000001%';
+    ENSMUSP00000000001      chr3    108145887       108146005       -       0       118
+    ENSMUSP00000000001      chr3    108123794       108123837       -       118     161
+    ENSMUSP00000000001      chr3    108123541       108123683       -       161     303
+    ENSMUSP00000000001      chr3    108118300       108118458       -       303     461
+    ENSMUSP00000000001      chr3    108115762       108115891       -       461     590
+    ENSMUSP00000000001      chr3    108112472       108112602       -       590     720
+    ENSMUSP00000000001      chr3    108111934       108112088       -       720     874
+    ENSMUSP00000000001      chr3    108109421       108109612       -       874     1065
+
+This schema can describe structural variants as well as canonical transcripts.
+
+    ]]></help>
+</tool>