Mercurial > repos > galaxyp > mzsqlite_psm_align
changeset 1:4f8cf8fbef57 draft default tip
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit b0c57cac4e558d974a16b14d4498cf8d4ba9e0c7
author | galaxyp |
---|---|
date | Thu, 19 Apr 2018 14:30:28 -0400 |
parents | f2dc9805107a |
children | |
files | mzsqlite_psm_align.xml |
diffstat | 1 files changed, 43 insertions(+), 11 deletions(-) [+] |
line wrap: on
line diff
--- a/mzsqlite_psm_align.xml Mon Apr 16 18:00:53 2018 -0400 +++ b/mzsqlite_psm_align.xml Thu Apr 19 14:30:28 2018 -0400 @@ -1,5 +1,5 @@ -<tool id="mzsqlite_psm_align" name="MzSQLite ProBED ProBAM" version="0.1.0"> - <description>from mz.sqlite aand genomic mapping</description> +<tool id="mzsqlite_psm_align" name="MzSQLite ProBAM ProBED" version="0.1.0"> + <description>from mz.sqlite and genomic mapping</description> <requirements> <requirement type="package">biopython</requirement> <requirement type="package">twobitreader</requirement> @@ -41,10 +41,13 @@ '$mzsqlitedb' '$genomicdb' ]]></command> <inputs> - <param name="mzsqlitedb" type="data" format="mz.sqlite" label="mz.sqlite databse"/> - <param name="genomicdb" type="data" format="sqlite" label="genomic mapping sqlite databse"/> + <param name="mzsqlitedb" type="data" format="mz.sqlite" label="mz.sqlite database" + help="generated from mzIndentML by mz_to_sqlite"/> + <param name="genomicdb" type="data" format="sqlite" label="genomic mapping sqlite database" + help="Genomic mapping for the Search proteins in the mzIdentML"/> <conditional name="ref"> - <param name="ref_source" type="select" label="Source for Genomic Sequence Data"> + <param name="ref_source" type="select" label="Source for Genomic Sequence Data" + help="Used to generate the genomic reference sequence for idenfied peptides"> <option value="cached">Locally cached twobit</option> <option value="history">History dataset twobit</option> </param> @@ -57,11 +60,14 @@ <param name="ref_file" type="data" format="twobit" label="reference 2bit file" /> </when> </conditional> - <param name="gffutilsdb" type="data" format="sqlite" label="gffutils sqlite database" optional="true"/> - <param name="readlignments" type="data" format="bam" label="read alignments bam" optional="true"/> - <param name="genomicref" type="text" value="" label="Genome Reference name" optional="true"/> + <param name="gffutilsdb" type="data" format="sqlite" label="gffutils sqlite database" optional="true" + help="Categorizes the peptide by GTF feature for the proBAM XG tag" /> + <param name="readlignments" type="data" format="bam" label="read alignments bam" optional="true" + help="Allows proBAM SEQ field to be modified with observed variants"/> + <param name="genomicref" type="text" value="" label="Genome Reference name" optional="true" + help="The genome reference name to use in the proBED genomeReferenceVersion column"/> <param name="output_formats" type="select" display="checkboxes" label="outputs" multiple="true"> - <option value="probam">pro.bam</option> + <option value="probam" selected="true">pro.bam</option> <option value="prosam">pro.sam</option> <option value="probed">pro.bed</option> </param> @@ -79,13 +85,18 @@ </outputs> <help><![CDATA[ -Generates proBAM or proBED feature alignment files for peptides identified from a mass spectrometry protein search analysis. +Generates proBAM_ or proBED_ feature alignment files for peptides identified from a mass spectrometry protein search analysis. + -The tool mz_to_sqlite generates the a SQLite database for a mzIdentML file, +The tool mz_to_sqlite_ generates the a SQLite database for a mzIdentML file, along with the fasta search database and the spectrum files used in the search. +This mz.sqlite database is used in conjuction with a genomic mapping sqlite database +to generate the proBAM_ or proBED_ feature alignment files. The genomic mapping sqlite database has this schema: +:: + CREATE TABLE feature_cds_map ( /* One row for each exon in the search protein */ name TEXT, /* Accession name of search protein in mzIdentML */ chrom TEXT, /* Reference genome chromosome for this exon */ @@ -96,8 +107,13 @@ cds_end INTEGER /* The CDS coding start end this exon (non-inclusive) */ ); + Example: + +:: + sqlite> select * from feature_cds_map WHERE name like 'ENSMUSP00000000001%'; + name chrom start end strand cds_start cds_end ENSMUSP00000000001 chr3 108145887 108146005 - 0 118 ENSMUSP00000000001 chr3 108123794 108123837 - 118 161 ENSMUSP00000000001 chr3 108123541 108123683 - 161 303 @@ -107,7 +123,23 @@ ENSMUSP00000000001 chr3 108111934 108112088 - 720 874 ENSMUSP00000000001 chr3 108109421 108109612 - 874 1065 +Each row represents an exon in the search protein. +The locations: start,end, cds_start, and cds_end are **zero-based** like BED format. + +The **name** field must match the **accession** name used in the mz.sqlite database +and thus the mzIdentML search results file. + +The protein positions are described in CDS base offsets rather than Animo Acids offsets +to allow for codons being split across exons. + This schema can describe structural variants as well as canonical transcripts. +.. _proBAM: http://www.psidev.info/probam +.. _proBED: http://www.psidev.info/probed +.. _mz_to_sqlite: https://toolshed.g2.bx.psu.edu/view/galaxyp/mz_to_sqlite/e34bdac5b157 + ]]></help> + <citations> + <citation type="doi">10.1186/s13059-017-1377-x</citation> + </citations> </tool>