Mercurial > repos > galaxyp > mzsqlite_psm_align
view mzsqlite_psm_align.xml @ 1:4f8cf8fbef57 draft default tip
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit b0c57cac4e558d974a16b14d4498cf8d4ba9e0c7
author | galaxyp |
---|---|
date | Thu, 19 Apr 2018 14:30:28 -0400 |
parents | f2dc9805107a |
children |
line wrap: on
line source
<tool id="mzsqlite_psm_align" name="MzSQLite ProBAM ProBED" version="0.1.0"> <description>from mz.sqlite and genomic mapping</description> <requirements> <requirement type="package">biopython</requirement> <requirement type="package">twobitreader</requirement> <requirement type="package">pysam</requirement> <requirement type="package">gffutils</requirement> </requirements> <command detect_errors="exit_code"><![CDATA[ #if $readlignments: ln -s -f '${$readlignments}' 'input.bam' && ln -s -f '${$readlignments.metadata.bam_index}' 'input.bam.bai' && #end if python '$__tool_directory__/mzsqlite_psm_align.py' #if $ref.ref_source == 'cached': --twobit='$ref.ref_loc.fields.path' #elif $ref.ref_source == 'history': --twobit='$ref.ref_file' #end if #if $gffutilsdb: --gffutils_sqlite '$gffutilsdb' #end if #if $readlignments: --reads_bam 'input.bam' #end if #if 'probed' in str($output_formats).split(','): --probed '$probed' #end if #if 'prosam' in str($output_formats).split(','): --prosam '$prosam' #end if #if 'probam' in str($output_formats).split(','): --probam '$probam' #end if #if $genomicref: --genomeReference $genomicref #else --genomeReference $genomicdb.metadata.dbkey #end if -v '$mzsqlitedb' '$genomicdb' ]]></command> <inputs> <param name="mzsqlitedb" type="data" format="mz.sqlite" label="mz.sqlite database" help="generated from mzIndentML by mz_to_sqlite"/> <param name="genomicdb" type="data" format="sqlite" label="genomic mapping sqlite database" help="Genomic mapping for the Search proteins in the mzIdentML"/> <conditional name="ref"> <param name="ref_source" type="select" label="Source for Genomic Sequence Data" help="Used to generate the genomic reference sequence for idenfied peptides"> <option value="cached">Locally cached twobit</option> <option value="history">History dataset twobit</option> </param> <when value="cached"> <param name="ref_loc" type="select" label="Select reference 2bit file"> <options from_data_table="twobit" /> </param> </when> <when value="history"> <param name="ref_file" type="data" format="twobit" label="reference 2bit file" /> </when> </conditional> <param name="gffutilsdb" type="data" format="sqlite" label="gffutils sqlite database" optional="true" help="Categorizes the peptide by GTF feature for the proBAM XG tag" /> <param name="readlignments" type="data" format="bam" label="read alignments bam" optional="true" help="Allows proBAM SEQ field to be modified with observed variants"/> <param name="genomicref" type="text" value="" label="Genome Reference name" optional="true" help="The genome reference name to use in the proBED genomeReferenceVersion column"/> <param name="output_formats" type="select" display="checkboxes" label="outputs" multiple="true"> <option value="probam" selected="true">pro.bam</option> <option value="prosam">pro.sam</option> <option value="probed">pro.bed</option> </param> </inputs> <outputs> <data name="prosam" format="sam" label="pro.SAM on ${on_string}"> <filter>'prosam' in output_formats</filter> </data> <data name="probam" format="bam" label="pro.Bam on ${on_string}"> <filter>'probam' in output_formats</filter> </data> <data name="probed" format="bed" label="pro.Bed on ${on_string}"> <filter>'probed' in output_formats</filter> </data> </outputs> <help><![CDATA[ Generates proBAM_ or proBED_ feature alignment files for peptides identified from a mass spectrometry protein search analysis. The tool mz_to_sqlite_ generates the a SQLite database for a mzIdentML file, along with the fasta search database and the spectrum files used in the search. This mz.sqlite database is used in conjuction with a genomic mapping sqlite database to generate the proBAM_ or proBED_ feature alignment files. The genomic mapping sqlite database has this schema: :: CREATE TABLE feature_cds_map ( /* One row for each exon in the search protein */ name TEXT, /* Accession name of search protein in mzIdentML */ chrom TEXT, /* Reference genome chromosome for this exon */ start INTEGER, /* genomic start of the exon (zero-based like BED) */ end INTEGER, /* genomic end of the exon (non-incluse like BED) */ strand TEXT, /* genomic strand: '+' or '-' */ cds_start INTEGER, /* The CDS coding start for this exon (zero-based) */ cds_end INTEGER /* The CDS coding start end this exon (non-inclusive) */ ); Example: :: sqlite> select * from feature_cds_map WHERE name like 'ENSMUSP00000000001%'; name chrom start end strand cds_start cds_end ENSMUSP00000000001 chr3 108145887 108146005 - 0 118 ENSMUSP00000000001 chr3 108123794 108123837 - 118 161 ENSMUSP00000000001 chr3 108123541 108123683 - 161 303 ENSMUSP00000000001 chr3 108118300 108118458 - 303 461 ENSMUSP00000000001 chr3 108115762 108115891 - 461 590 ENSMUSP00000000001 chr3 108112472 108112602 - 590 720 ENSMUSP00000000001 chr3 108111934 108112088 - 720 874 ENSMUSP00000000001 chr3 108109421 108109612 - 874 1065 Each row represents an exon in the search protein. The locations: start,end, cds_start, and cds_end are **zero-based** like BED format. The **name** field must match the **accession** name used in the mz.sqlite database and thus the mzIdentML search results file. The protein positions are described in CDS base offsets rather than Animo Acids offsets to allow for codons being split across exons. This schema can describe structural variants as well as canonical transcripts. .. _proBAM: http://www.psidev.info/probam .. _proBED: http://www.psidev.info/probed .. _mz_to_sqlite: https://toolshed.g2.bx.psu.edu/view/galaxyp/mz_to_sqlite/e34bdac5b157 ]]></help> <citations> <citation type="doi">10.1186/s13059-017-1377-x</citation> </citations> </tool>