Mercurial > repos > galaxyp > map_peptides_to_bed

--- a/map_peptides_to_bed.py	Tue Jan 12 21:02:24 2016 -0500
+++ b/map_peptides_to_bed.py	Mon Feb 22 17:31:34 2016 -0500
@@ -232,7 +232,7 @@
   parser.add_option( '-t', '--translated_bed', dest='translated_bed', default=None, help='A bed file with added 13th column having a translation'  )
   parser.add_option( '-i', '--input', dest='input', default=None, help='Tabular file with peptide_sequence column' )
   parser.add_option( '-p', '--peptide_column', type='int', dest='peptide_column', default=1, help='column ordinal with peptide sequence' )
-  parser.add_option( '-n', '--name_column', type='int', dest='name_column', default=None, help='column ordinal with protein name' )
+  parser.add_option( '-n', '--name_column', type='int', dest='name_column', default=2, help='column ordinal with protein name' )
   parser.add_option( '-s', '--start_column', type='int', dest='start_column', default=None, help='column with peptide start position in protein' )
   parser.add_option( '-B', '--bed', dest='bed', default=None, help='Output a bed file with added 13th column having translation'  )
   ## parser.add_option( '-G', '--gff3', dest='gff', default=None, help='Output translations to a GFF3 file'  )
--- a/map_peptides_to_bed.xml	Tue Jan 12 21:02:24 2016 -0500
+++ b/map_peptides_to_bed.xml	Mon Feb 22 17:31:34 2016 -0500
@@ -1,4 +1,5 @@
 <tool id="map_peptides_to_bed" name="Map peptides to a bed file" version="0.1.0">
+    <description>for viewing in a genome browser</description>
     <requirements>
         <requirement type="package" version="1.62">biopython</requirement>
     </requirements>
@@ -22,15 +23,22 @@
           --bed="$mapped_peptides"
     ]]></command>
     <inputs>
-        <param name="translated_bed" type="data" format="bed" label="Translated bed with IDs to match in the input" help=""/>
-        <param name="input" type="data" format="tabular" label="Identified Peptides" help=""/>
-        <param name="peptide_column" type="data_column" data_ref="input" label="peptide column" optional="true"
-               help="Defaults to first column"/>
-        <param name="name_column" type="data_column" data_ref="input" label="protein name column" optional="true"
-               help="The name in this column must match the name column in the Translate bed"/>
-        <param name="start_column" type="data_column" data_ref="input" label="peptide offset column" optional="true"
-               help="The offset in AnimoAcids of the peptide from the start of the protein sequence"/>
-        <param name="gffTags" type="boolean" truevalue="--gffTags" falsevalue="" checked="true" label="Use #gffTags in output" help=""/>
+        <param name="translated_bed" type="data" format="bed" label="Translated BED"
+               help="mapping Protein IDs from a Protein Search fasta to a reference genome"/>
+        <param name="input" type="data" format="tabular" label="Identified Peptides"
+               help="Such as a  PSM (Peptide Spectral Match) report from a Proteomics Search Application"/>
+        <param name="peptide_column" type="data_column" data_ref="input" label="PSM peptide column" optional="true"
+               help="Contains the peptide amino acid sequence. Defaults to first column"/>
+        <param name="name_column" type="data_column" data_ref="input" label="PSM protein name column" optional="true"
+               help="The name in this column must match the name column in the Translate BED input. Defaults to second column." />
+        <param name="start_column" type="data_column" data_ref="input" label="PSM peptide offset column (optional)" optional="true">
+               <help>The offset in AnimoAcids of the peptide from the start of the protein sequence.
+                     If this column is not available, the application will expect the Translated BED file
+                     to have a 13th column with the protein sequence from which to determine the offset
+               </help>
+        </param>
+        <param name="gffTags" type="boolean" truevalue="--gffTags" falsevalue="" checked="true" label="Use #gffTags in output"
+               help="and use the peptide as the display name for the entry"/>
     </inputs>
     <outputs>
         <data name="mapped_peptides" format="bed" />
@@ -45,7 +53,60 @@
         </test>
     </tests>
     <help><![CDATA[
-        Usage: map_peptides_to_bed.py [options]
+**Map peptides to a bed file**
+
+This tool is intended to map peptides identified by Mass Spectrometry relative to the protein sequence in the Proteomics Search database.
+
+It generates a BED file that maps the location of peptides within proteins mapped to a reference genome so that they can be displayed in a genome browser.
+
+
+The input is a tabular file that has columns containing: peptide,  protein ID, and optionally the offset of the peptide from the start of the protein sequence.
+
+The other input is a BED file decribing the location of protein sequences relative to a reference genome.
+This file can be produced by the Translate BED Sequences tool when generating the fasta file of translations.
+
+The output is a BED file with lines from the input BED file that had peptide matches.
+The ID, thichStart, and thickEnd fields are changed to reflect the peptide match to that protein entry.
+
+
+Inputs:
+
+  - A BED file for the Proteins contained in a Proteomics Search Database.
+    (Should have a 13th column with the protein sequence.)
+
+  - A tabular file (Pepetide Spectral Matach report) that includes columns that contain the protein identifer and the peptide sequence.
+
+
+Output:
+
+  - A BED file with an entry from the input BED file for each mapped input peptide,
+    with the ID, thichStart, and thickEnd fields are changed to reflect the peptide.
+
+
+Example:
+
+  Input BED file ::
+
+    track name="novel_junction_peptides" type=bedDetail description="test"
+    5	90931315	90932985	JUNC00034987_3	1	+	90931315	90932985	255,0,0	2	118,74	0,1596	AWRRGPAASRCSGAVWAEGLCEPRASPSARGAASGAAAGGPAERRDSIFQRLPLSASPFSHLFK
+    X	157593859	157598461	JUNC00080388_3	11	-	157593859	157598461	255,0,0	2	14,151	0,4451	SPGLVRMVLCRPRPFLFPFGVSAPGREPLRAPAASACALPRGASVRPSKEIICVF
+
+
+  Input PSM (Peptide Spectral Match) file ::
+
+    25895	JUNC00034987_3	ASPSARGAASGAAAGGPAER
+    15253	JUNC00080388_3	APAASACALPR
+
+
+  Output BED file::
+
+    track name="novel_junction_peptides" type=bedDetail description="test"
+    #gffTags
+    5	90931315	90932985	ID=JUNC00034987_3;Name=ASPSARGAASGAAAGGPAER	1	+	90931387	90932925	255,0,0	2	118,74	0,1596	ASPSARGAASGAAAGGPAER	AWRRGPAASRCSGAVWAEGLCEPRASPSARGAASGAAAGGPAERRDSIFQRLPLSASPFSHLFK
+    X	157593859	157598461	ID=JUNC00080388_3;Name=APAASACALPR	11	-	157598338	157598371	255,0,0	2	14,151	0,4451	APAASACALPR	SPGLVRMVLCRPRPFLFPFGVSAPGREPLRAPAASACALPRGASVRPSKEIICVF
+
+
+Usage: map_peptides_to_bed.py [options]

 Options:
   -h, --help            show this help message and exit