Mercurial > repos > galaxyp > mzsqlite_psm_align

--- a/mzsqlite_psm_align.xml	Mon Apr 16 18:00:53 2018 -0400
+++ b/mzsqlite_psm_align.xml	Thu Apr 19 14:30:28 2018 -0400
@@ -1,5 +1,5 @@
-<tool id="mzsqlite_psm_align" name="MzSQLite ProBED ProBAM" version="0.1.0">
-    <description>from mz.sqlite aand genomic mapping</description>
+<tool id="mzsqlite_psm_align" name="MzSQLite ProBAM ProBED" version="0.1.0">
+    <description>from mz.sqlite and genomic mapping</description>
     <requirements>
         <requirement type="package">biopython</requirement>
         <requirement type="package">twobitreader</requirement>
@@ -41,10 +41,13 @@
             '$mzsqlitedb' '$genomicdb'
     ]]></command>
     <inputs>
-        <param name="mzsqlitedb" type="data" format="mz.sqlite" label="mz.sqlite databse"/>
-        <param name="genomicdb" type="data" format="sqlite" label="genomic mapping sqlite databse"/>
+        <param name="mzsqlitedb" type="data" format="mz.sqlite" label="mz.sqlite database"
+               help="generated from mzIndentML by mz_to_sqlite"/>
+        <param name="genomicdb" type="data" format="sqlite" label="genomic mapping sqlite database"
+               help="Genomic mapping for the Search proteins in the mzIdentML"/>
         <conditional name="ref">
-            <param name="ref_source" type="select" label="Source for Genomic Sequence Data">
+            <param name="ref_source" type="select" label="Source for Genomic Sequence Data"
+                   help="Used to generate the genomic reference sequence for idenfied peptides">
                 <option value="cached">Locally cached twobit</option>
                 <option value="history">History dataset twobit</option>
             </param>
@@ -57,11 +60,14 @@
                 <param name="ref_file" type="data" format="twobit" label="reference 2bit file" />
             </when>
         </conditional>
-        <param name="gffutilsdb" type="data" format="sqlite" label="gffutils sqlite database" optional="true"/>
-        <param name="readlignments" type="data" format="bam" label="read alignments bam" optional="true"/>
-        <param name="genomicref" type="text" value="" label="Genome Reference name" optional="true"/>
+        <param name="gffutilsdb" type="data" format="sqlite" label="gffutils sqlite database" optional="true"
+               help="Categorizes the peptide by GTF feature for the proBAM XG tag" />
+        <param name="readlignments" type="data" format="bam" label="read alignments bam" optional="true"
+               help="Allows proBAM SEQ field to be modified with observed variants"/>
+        <param name="genomicref" type="text" value="" label="Genome Reference name" optional="true"
+               help="The genome reference name to use in the proBED genomeReferenceVersion column"/>
         <param name="output_formats" type="select" display="checkboxes" label="outputs" multiple="true">
-            <option value="probam">pro.bam</option>
+            <option value="probam" selected="true">pro.bam</option>
             <option value="prosam">pro.sam</option>
             <option value="probed">pro.bed</option>
         </param>
@@ -79,13 +85,18 @@
     </outputs>
     <help><![CDATA[

-Generates proBAM or proBED feature alignment files for peptides identified from a mass spectrometry protein search analysis.
+Generates proBAM_ or proBED_ feature alignment files for peptides identified from a mass spectrometry protein search analysis.
+

-The tool mz_to_sqlite generates the a SQLite database for a mzIdentML file,
+The tool mz_to_sqlite_ generates the a SQLite database for a mzIdentML file,
 along with the fasta search database and the spectrum files used in the search.
+This mz.sqlite database is used in conjuction with a genomic mapping sqlite database
+to generate the proBAM_ or proBED_ feature alignment files.

 The genomic mapping sqlite database has this schema:

+::
+
     CREATE TABLE feature_cds_map (	/* One row for each exon in the search protein */
         name TEXT, 		/* Accession name of search protein in mzIdentML */
         chrom TEXT, 		/* Reference genome chromosome for this exon */
@@ -96,8 +107,13 @@
         cds_end INTEGER		/* The CDS coding start end this exon (non-inclusive) */
     );

+
 Example:
+
+::
+
     sqlite> select * from feature_cds_map WHERE name like 'ENSMUSP00000000001%';
+    name                   chrom    start           end         strand  cds_start cds_end
     ENSMUSP00000000001      chr3    108145887       108146005       -       0       118
     ENSMUSP00000000001      chr3    108123794       108123837       -       118     161
     ENSMUSP00000000001      chr3    108123541       108123683       -       161     303
@@ -107,7 +123,23 @@
     ENSMUSP00000000001      chr3    108111934       108112088       -       720     874
     ENSMUSP00000000001      chr3    108109421       108109612       -       874     1065

+Each row represents an exon in the search protein.
+The locations: start,end, cds_start, and cds_end are **zero-based** like BED format.
+
+The **name** field must match the **accession** name used in the mz.sqlite database
+and thus the mzIdentML search results file.
+
+The protein positions are described in CDS base offsets rather than Animo Acids offsets
+to allow for codons being split across exons.
+
 This schema can describe structural variants as well as canonical transcripts.

+.. _proBAM: http://www.psidev.info/probam
+.. _proBED: http://www.psidev.info/probed
+.. _mz_to_sqlite: https://toolshed.g2.bx.psu.edu/view/galaxyp/mz_to_sqlite/e34bdac5b157
+
     ]]></help>
+    <citations>
+        <citation type="doi">10.1186/s13059-017-1377-x</citation>
+    </citations>
 </tool>