changeset 7:9ee2e20c6f1a draft

Uploaded
author simonalpha
date Mon, 05 Jan 2015 17:17:32 -0500
parents 79ed4588f7ae
children dfb2b78b3aff
files README README.md msgfplus_search.xml repository_dependencies.xml test-data/bsa.fasta test-data/bsa.mzML test-data/bsa.mzid tool-data/msgfplus_mods.loc.sample tool-data/pepxml_databases.loc.sample tool_dependencies.xml
diffstat 10 files changed, 0 insertions(+), 527 deletions(-) [+]
line wrap: on
line diff
--- a/README	Tue Mar 18 19:46:10 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
-This package is a galaxy wrapper for the MSGF+ search tool.
-
-Requirements:
-This package depends on the galaxy_protk, protk_msgfplus, protk_proteowizard packages
-Please see instructions for those packages before installing
-
-In addition to basic requirements you must also have unzip and java 6 runtime (or greater) installed
--- a/README.md	Tue Mar 18 19:46:10 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,8 +0,0 @@
-## What is it?
-Galaxy tool definition file and wrapper scripts for the [MSGF+ Search Engine](http://proteomics.ucsd.edu/Software/MSGFPlus.html).
-
-## Installation
-Install from the main galaxy toolshed at http://toolshed.g2.bx.psu.edu/
-
-Depends on command-line scripts and databases available in the [protk ruby gem](https://bitbucket.org/iracooke/protk). 
-
--- a/msgfplus_search.xml	Tue Mar 18 19:46:10 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,188 +0,0 @@
-<tool id="proteomics_search_msgfplus_1" name="MSGF+ MSMS Search" version="1.0.2">
-    <description>Run an MSGF+ Search</description>
-    <requirements>
-        <requirement type="package" version="1.2.6">protk</requirement>
-        <requirement type="package" version="20140210">msgfplus</requirement>
-        <requirement type="package" version="3_0_4388">proteowizard</requirement>
-    </requirements>
-    <command>
-        msgfplus_search.rb
-        #if $database.source_select=="built_in":
-         --galaxy -d $database.dbkey
-        #else
-        --galaxy -d $database.fasta_file
-        #end if
-
-        --var-mods='
-        $variable_mods
-        #for $custom_variable_mod in $custom_variable_mods:
-        ,${custom_variable_mod.custom_mod}
-        #end for
-        '
-
-        --fix-mods='
-        $fixed_mods
-        #for $custom_fix_mod in $custom_fix_mods:
-        ,${custom_fix_mod.custom_mod}
-        #end for
-        '
-
-        $input_file 
-        -o $output 
-        -r 
-        --enzyme=$enzyme 
-        --precursor-ion-tol-units=$precursor_tolu 
-        -v $missed_cleavages
-        $cleavage_semi
-        -f $fragment_ion_tol 
-        -p $precursor_ion_tol 
-        --instrument=$instrument
-        --isotope-error-range=$isotope_error_range
-        --fragment-method=$fragment_method
-        --protocol=$protocol
-        --min-pep-len=$min_pep_len
-        --max-pep-len=$max_pep_len
-        --max-pep-charge=$max_pep_charge
-        --min-pep-charge=$min_pep_charge
-        --num-reported-matches=$num_reported_matches
-        --java-mem=$java_mem
-        #unless $pepxml_output_use:
-        --no-pepxml
-        #end unless
-    </command>
-
-    <inputs>
-        <conditional name="database">
-            <param name="source_select" type="select" label="Database source">
-                <option value="built_in">Built-In</option>
-                <option value="input_ref">Your Upload File</option>
-            </param>
-            <when value="built_in">
-                <param name="dbkey" type="select" format="text" >
-                    <label>Database</label>
-                    <options from_file="pepxml_databases.loc">
-                        <column name="name" index="0" />
-                        <column name="value" index="2" />
-                    </options>
-                </param>
-            </when>
-            <when value="input_ref">
-                <param name="fasta_file" type="data" format="fasta" label="Uploaded FASTA file" />
-            </when>
-        </conditional>
-        <param name="input_file" type="data" format="mzml" multiple="false" label="MSMS File" help="An mzML file with MS/MS data"/>
-        <param name="variable_mods" format="text" type="select" multiple="true" label="Variable Modifications" help="Multiple Selection Allowed">
-            <options from_file="msgfplus_mods.loc">
-                <column name="name" index="0" />
-                <column name="value" index="2" />
-            </options>
-        </param>
-        <repeat name="custom_variable_mods" title="Custom Variable Modifications" 
-            help="See https://bix-lab.ucsd.edu/pages/viewpage.action?pageId=13533355 for details on how to create these">
-            <param name="custom_mod" type="text" />
-        </repeat>
-        <param name="fixed_mods" format="text" type="select" multiple="true" label="Fixed Modifications" help="Multiple Selection Allowed">
-            <options from_file="msgfplus_mods.loc">
-                <column name="name" index="0" />
-                <column name="value" index="2" />
-            </options>
-        </param>
-        <repeat name="custom_fix_mods" title="Custom Fixed Modifications" help="See https://bix-lab.ucsd.edu/pages/viewpage.action?pageId=13533355 for details on how to create these">
-            <param name="custom_mod" type="text">
-            </param>
-        </repeat>
-        <param name="missed_cleavages" type="select" format="text" help="Allow peptides to contain up to this many missed enzyme cleavage sites">
-            <label>Missed Cleavages Allowed</label>
-            <option value="0">0</option>
-            <option value="1">1</option>
-            <option value="2">2</option>
-        </param>
-        
-        <param name="cleavage_semi" type="boolean" label="Allow semi-cleaved peptides" help="This can increase search time dramatically" truevalue="" falsevalue="--no-cleavage-semi" />
-
-        <param name="enzyme" type="select" format="text">
-            <label>Enzyme</label>
-            <option value="0">unspecific cleavage</option>
-            <option value="1">Trypsin</option>
-            <option value="2">Chymotrypsin</option>
-            <option value="3">Lys-C</option>
-            <option value="4">Lys-N</option>
-            <option value="5">glutamyl endopeptidase</option>
-            <option value="6">Arg-C</option>
-            <option value="7">Asp-N</option>
-            <option value="8">alphaLP</option>
-            <option value="9">no cleavage</option>
-        </param>
-        
-        <param name="instrument" type="select" format="text">
-            <label>Instrument Type</label>
-            <option value="2">TOF</option>
-            <option value="0">Low-res LCQ/LTQ</option>
-            <option value="1">High-res LTQ</option>
-        </param>
-
-        <param name="fragment_method" type="select" format="text">
-            <label>Fragmentation Method</label>
-            <option value="0">Respect Input File</option>
-            <option value="1">CID</option>
-            <option value="2">ETD</option>
-            <option value="3">HCD</option>
-            <option value="4">Merge spectra from same precursor</option>
-        </param>
-
-        <param name="protocol" type="select" format="text">
-            <label>Protocol</label>
-            <option value="0">NoProtocol</option>
-            <option value="1">Phosphorylation</option>
-            <option value="2">iTRAQ</option>
-            <option value="3">iTRAQPhospho</option>
-        </param>
-
-        <param name="fragment_ion_tol" help="Fragment Ion Tolerance in Daltons" type="float" value="0.65" min="0" max="10000" label="Fragment ion tolerance"/>
-
-        <param name="precursor_ion_tol" help="Precursor Ion Tolerance (Da or ppm)" type="float" value="100" min="0" max="10000" label="Precursor ion tolerance"/>
-        <param name="precursor_tolu" type="select" format="text">
-            <label>Precursor Ion Tolerance Units</label>
-            <option value="ppm">ppm</option>
-            <option value="Da">Da</option>
-        </param>
-
-        <param name="isotope_error_range" help="Takes into account of the error introduced by chooosing a non-monoisotopic peak for fragmentation." type="text" size="80" value="0,1" label="Isotope Error Range"/>
-        <param name="min_pep_len" help="" type="integer" value="6" label="Minimum Peptide Length"/>
-        <param name="max_pep_len" help="" type="integer" value="40" label="Maximum Peptide Length"/>
-        <param name="min_pep_charge" help="" type="integer" value="2" label="Minimum Peptide Charge"/>
-        <param name="max_pep_charge" help="" type="integer" value="3" label="Maximum Peptide Charge"/>
-        <param name="num_reported_matches" help="Number of matches per spectrum to be reported" type="integer" value="1" label="Num reported matches"/>
-        <param name="java_mem" help="Increase this value if you get out of memory errors" type="text" size="80" value="3500M" label="Java Memory Limit"/>
-        <param name="pepxml_output_use" type="boolean" label="Convert results to pepXML" help="" truevalue="true" falsevalue="false" />
-    </inputs>
-    <outputs>
-        <data format="mzid" name="output" metadata_source="input_file" label="MSGF+_vs_${database.dbkey if $database.has_key('dbkey') else $database.fasta_file.display_name}.${input_file.display_name}.${input_file.display_name}">
-            <change_format>
-                <when input="pepxml_output_use" value="true" format="raw_pepxml" metadata_source="input_file" 
-                    label="MSGF+_vs_${database.dbkey if $database.has_key('dbkey') else $database.fasta_file.display_name}.${input_file.display_name}.${input_file.display_name}"/>
-            </change_format>
-        </data>
-    </outputs>
-    <tests>
-        <test>
-            <param name="source_select" value="input_ref"/>
-              <param name="fasta_file" value="bsa.fasta"/>
-                  <param name="input_file" value="bsa.mzML"/>
-              <output name="output" file="bsa.mzid" compare="sim_size" delta="600" /> 
-        </test>
-    </tests>
-    <help>
-
-**What it does**
-
-Runs an MS/MS database search using the MSGFPlus search engine. Output is in the form of a pepXML file containing identified peptides along with their raw search scores.
-
-----
-
-**References**
-
-Please see http://proteomics.ucsd.edu/Software/MSGFPlus.html for details of the MSGFPlus search engine and references describing its algorithm
-
-    </help>
-</tool>
--- a/repository_dependencies.xml	Tue Mar 18 19:46:10 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,4 +0,0 @@
-<?xml version="1.0"?>
-<repositories description="Proteomics datatypes, MSGF+ and Protk">
-    <repository changeset_revision="7101f7e4b00b" name="proteomics_datatypes" owner="iracooke" toolshed="http://testtoolshed.g2.bx.psu.edu" />
- </repositories>
--- a/test-data/bsa.fasta	Tue Mar 18 19:46:10 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,26 +0,0 @@
->sp|ALBU_BOVIN|
-MKWVTFISLLLLFSSAYSRGVFRRDTHKSEIAHRFKDLGEEHFKGLVLIA
-FSQYLQQCPFDEHVKLVNELTEFAKTCVADESHAGCEKSLHTLFGDELCK
-VASLRETYGDMADCCEKQEPERNECFLSHKDDSPDLPKLKPDPNTLCDEF
-KADEKKFWGKYLYEIARRHPYFYAPELLYYANKYNGVFQECCQAEDKGAC
-LLPKIETMREKVLASSARQRLRCASIQKFGERALKAWSVARLSQKFPKAE
-FVEVTKLVTDLTKVHKECCHGDLLECADDRADLAKYICDNQDTISSKLKE
-CCDKPLLEKSHCIAEVEKDAIPENLPPLTADFAEDKDVCKNYQEAKDAFL
-GSFLYEYSRRHPEYAVSVLLRLAKEYEATLEECCAKDDPHACYSTVFDKL
-KHLVDEPQNLIKQNCDQFEKLGEYGFQNALIVRYTRKVPQVSTPTLVEVS
-RSLGKVGTRCCTKPESERMPCTEDYLSLILNRLCVLHEKTPVSEKVTKCC
-TESLVNRRPCFSALTPDETYVPKAFDEKLFTFHADICTLPDTEKQIKKQT
-ALVELLKHKPKATEEQLKTVMENFVAFVDKCCAADDKEACFAVEGPKLVV
-STQTALA
->sp|AMYS_HUMAN|
-MKLFWLLFTIGFCWAQYSSNTQQGRTSIVHLFEWRWVDIALECERYLAPK
-GFGGVQVSPPNENVAIHNPFRPWWERYQPVSYKLCTRSGNEDEFRNMVTR
-CNNVGVRIYVDAVINHMCGNAVSAGTSSTCGSYFNPGSRDFPAVPYSGWD
-FNDGKCKTGSGDIENYNDATQVRDCRLSGLLDLALGKDYVRSKIAEYMNH
-LIDIGVAGFRIDASKHMWPGDIKAILDKLHNLNSNWFPEGSKPFIYQEVI
-DLGGEPIKSSDYFGNGRVTEFKYGAKLGTVIRKWNGEKMSYLKNWGEGWG
-FMPSDRALVFVDNHDNQRGHGAGGASILTFWDARLYKMAVGFMLAHPYGF
-TRVMSSYRWPRYFENGKDVNDWVGPPNDNGVTKEVTINPDTTCGNDWVCE
-HRWRQIRNMVNFRNVVDGQPFTNWYDNGSNQVAFGRGNRGFIVFNNDDWT
-FSLTLQTGLPAGTYCDVISGDKINGNCTGIKIYVSDDGKAHFSISNSAED
-PFIAIHAESKL
\ No newline at end of file
--- a/test-data/bsa.mzML	Tue Mar 18 19:46:10 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,96 +0,0 @@
-<?xml version="1.0" encoding="ISO-8859-1"?>
-<indexedmzML xmlns="http://psi.hupo.org/ms/mzml" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.1_idx.xsd">
-  <mzML xmlns="http://psi.hupo.org/ms/mzml" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.0.xsd" id="bsa" version="1.1.0">
-    <cvList count="2">
-      <cv id="MS" fullName="Proteomics Standards Initiative Mass Spectrometry Ontology" version="3.30.0" URI="http://psidev.cvs.sourceforge.net/*checkout*/psidev/psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo"/>
-      <cv id="UO" fullName="Unit Ontology" version="12:10:2011" URI="http://obo.cvs.sourceforge.net/*checkout*/obo/obo/ontology/phenotype/unit.obo"/>
-    </cvList>
-    <fileDescription>
-      <fileContent>
-        <cvParam cvRef="MS" accession="MS:1000580" name="MSn spectrum" value=""/>
-        <cvParam cvRef="MS" accession="MS:1000127" name="centroid spectrum" value=""/>
-      </fileContent>
-      <sourceFileList count="1">
-        <sourceFile id="bsa.mgf" name="bsa.mgf" location="file:///">
-          <cvParam cvRef="MS" accession="MS:1000774" name="multiple peak list nativeID format" value=""/>
-          <cvParam cvRef="MS" accession="MS:1001062" name="Mascot MGF file" value=""/>
-          <cvParam cvRef="MS" accession="MS:1000569" name="SHA-1" value="92c6fa5d4ff59e2124950546a651bb9a85bbfd92"/>
-        </sourceFile>
-      </sourceFileList>
-    </fileDescription>
-    <softwareList count="1">
-      <software id="pwiz_3.0.4140" version="3.0.4140">
-        <cvParam cvRef="MS" accession="MS:1000615" name="ProteoWizard" value=""/>
-      </software>
-    </softwareList>
-    <instrumentConfigurationList count="1">
-      <instrumentConfiguration id="IC">
-        <cvParam cvRef="MS" accession="MS:1000031" name="instrument model" value=""/>
-      </instrumentConfiguration>
-    </instrumentConfigurationList>
-    <dataProcessingList count="1">
-      <dataProcessing id="pwiz_Reader_conversion">
-        <processingMethod order="0" softwareRef="pwiz_3.0.4140">
-          <cvParam cvRef="MS" accession="MS:1000544" name="Conversion to mzML" value=""/>
-        </processingMethod>
-        <processingMethod order="1" softwareRef="pwiz_3.0.4140">
-          <userParam name="most intense count (excluding ties at the threshold)" value="100"/>
-        </processingMethod>
-      </dataProcessing>
-    </dataProcessingList>
-    <run id="bsa" defaultInstrumentConfigurationRef="IC">
-      <spectrumList count="1" defaultDataProcessingRef="pwiz_Reader_conversion">
-        <spectrum index="0" id="index=0" defaultArrayLength="97" dataProcessingRef="pwiz_Reader_conversion">
-          <cvParam cvRef="MS" accession="MS:1000580" name="MSn spectrum" value=""/>
-          <cvParam cvRef="MS" accession="MS:1000511" name="ms level" value="2"/>
-          <cvParam cvRef="MS" accession="MS:1000127" name="centroid spectrum" value=""/>
-          <cvParam cvRef="MS" accession="MS:1000796" name="spectrum title" value="Cmpd 1, +MSn(722.3522), 33.5 min"/>
-          <cvParam cvRef="MS" accession="MS:1000130" name="positive scan" value=""/>
-          <cvParam cvRef="MS" accession="MS:1000528" name="lowest observed m/z" value="198.13402"/>
-          <cvParam cvRef="MS" accession="MS:1000527" name="highest observed m/z" value="1280.62643"/>
-          <cvParam cvRef="MS" accession="MS:1000285" name="total ion current" value="21166.0"/>
-          <cvParam cvRef="MS" accession="MS:1000504" name="base peak m/z" value="249.17284"/>
-          <cvParam cvRef="MS" accession="MS:1000505" name="base peak intensity" value="2164.0"/>
-          <scanList count="1">
-            <cvParam cvRef="MS" accession="MS:1000795" name="no combination" value=""/>
-            <scan>
-            </scan>
-          </scanList>
-          <precursorList count="1">
-            <precursor>
-              <selectedIonList count="1">
-                <selectedIon>
-                  <cvParam cvRef="MS" accession="MS:1000744" name="selected ion m/z" value="722.35225" unitCvRef="MS" unitAccession="MS:1000040" unitName="m/z"/>
-                  <cvParam cvRef="MS" accession="MS:1000041" name="charge state" value="2"/>
-                </selectedIon>
-              </selectedIonList>
-              <activation>
-              </activation>
-            </precursor>
-          </precursorList>
-          <binaryDataArrayList count="2">
-            <binaryDataArray encodedLength="1036">
-              <cvParam cvRef="MS" accession="MS:1000523" name="64-bit float" value=""/>
-              <cvParam cvRef="MS" accession="MS:1000576" name="no compression" value=""/>
-              <cvParam cvRef="MS" accession="MS:1000514" name="m/z array" value="" unitCvRef="MS" unitAccession="MS:1000040" unitName="m/z"/>
-              <binary>RPrt68CEaUDI7236swRrQLecS3FVwmxAdcjNcANFbUBlwi/182NuQL/xtWeWAm9AGm7A54clb0BNSkG3FyJxQJOMnIU9QXFAFCLgEKpScUCe6pCb4fJyQNbiUwAME3RA529CIQJSdUDmllZDYmJ2QFch5SfVQXdAOiNKe4NRd0A2WaMeolJ4QGwE4nX9YXhAG7tE9VZyeUAaNPRPcNN6QDyInSl0JHtAEqCmli1Te0CuDRXj/JJ7QBsS91h6gnxAet/42jOzfEC94xQdyZJ9QEbT2clgMn5Ah78ma9RCf0CAYI4ev1J/QPCiryCNKoBAqiuf5XkxgEDtuyL4H3qAQDI4Sl7dgYBAEVMiiZ66gEBi+IiY0tGAQDMbZJKR2YBAjL6CNCMKgUA0ETY8vUGBQEymCkYlUoFAzzEge71hgUB8D5cc97WBQKYnLPHAuYFArDlAMEfigUBWSPlJ9fGBQP2H9NsX/oFApMLYQlApgkBmFMstLUKCQG2tLxLaWYJAMPDce3ixgkAYsrrVc7mCQHAlOzbCOYNAPrMkQI1Bg0Ang6PkFWqDQOrPfqRIgoNAdZMYBBaKg0BaL4ZyIrqDQGIQWDm0woNANUHUfcDJg0AMAiuHVgqEQCzUmubdUoRAL6NYbilqhEDXwFYJlpqEQGjon+CiooRA9Pi9TT/ShEBrZcIvNdqEQBqjdVR16oRAs5jYfBzyhEBx5ldzwFGFQIz4Tsx6eoVAhPBo4wiKhUCCHJQwE5KFQCUGgZUD0oVAy9b6IuHZhUBeukkMAmKGQM11Gmkp8oZA1xcJbTnDh0AWwf9WMsuHQJhRLLd0SohAldQJaGJTiEAUyy2thoKIQNuizAbZColAjZyFPa0SiUDLSpNSkGKJQCveyDzymolAS80eaMXqiUB47j1csvKJQONTAIzneopAejarPlfTikAMPPcerlOLQDdPdciNW4tA1EM0usPji0CY3ZOHBauMQD90QX3L845A/+xHigh8j0D7OnDOKPaRQAUXK2oQ+pFAbqMBvCU+kkA=</binary>
-            </binaryDataArray>
-            <binaryDataArray encodedLength="520">
-              <cvParam cvRef="MS" accession="MS:1000521" name="32-bit float" value=""/>
-              <cvParam cvRef="MS" accession="MS:1000576" name="no compression" value=""/>
-              <cvParam cvRef="MS" accession="MS:1000515" name="intensity array" value="" unitCvRef="MS" unitAccession="MS:1000131" unitName="number of counts"/>
-              <binary>AABIQgAAUEIAAMpCAABkQwAA8EEAACBCAEAHRQAAgEIAACRDAICRQwAAhEIAAE9EAADwQQAAjkIAAAhCAAAEQgAAMEIAAChDAACmQgAA4EEAAJBDAADGQgAA1EIAAHhCAAAEQwAAHEMAAKpCAAAcQgAAgUMAALRCAIDuQwAADEIAAJxCAMCERAAAOEIAADlDAAAAQgAA3kIAAPxCAICfQwAASUMAAAtDAADoQQAA00MAgMNDAADoQQAA+UMAAOBBAABUQgAAH0MAAExCAICFQwAA+EEAABxCAACMQgAAIEIAAKhCAIDVQwAAKEMAAFpEAAAIQgAABEIAAOBBAAAMQgAA4EEAAIpCAAA1QwAA+EIAAFBCAADoQQAAEEIAACRCAIC8QwAAFUQAAKRDAAAkQgAAgUMAAAhCAIDFQwAA9kIAAOhBAADEQgAAFEMAAAxCAADUQgAAO0MAAApDAADgQQAA8EEAAANDAEAmRAAACEIAAL5CAAASRAAAAEIAABFDAEBQRA==</binary>
-            </binaryDataArray>
-          </binaryDataArrayList>
-        </spectrum>
-      </spectrumList>
-    </run>
-  </mzML>
-  <indexList count="2">
-    <index name="spectrum">
-      <offset idRef="index=0">2685</offset>
-    </index>
-  </indexList>
-  <indexListOffset>7058</indexListOffset>
-  <fileChecksum>b76171188a63cfad075d3738a172b15f1f9e0c4c</fileChecksum>
-</indexedmzML>
--- a/test-data/bsa.mzid	Tue Mar 18 19:46:10 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,123 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<MzIdentML id="MS-GF+" version="1.1.0" xmlns="http://psidev.info/psi/pi/mzIdentML/1.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psidev.info/psi/pi/mzIdentML/1.1 http://www.psidev.info/files/mzIdentML1.1.0.xsd" creationDate="2013-06-05T16:52:19" >
-<cvList xmlns="http://psidev.info/psi/pi/mzIdentML/1.1">
-    <cv id="PSI-MS" uri="http://psidev.cvs.sourceforge.net/viewvc/*checkout*/psidev/psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo" version="3.30.0" fullName="PSI-MS"/>
-    <cv id="UNIMOD" uri="http://www.unimod.org/obo/unimod.obo" fullName="UNIMOD"/>
-    <cv id="UO" uri="http://obo.cvs.sourceforge.net/*checkout*/obo/obo/ontology/phenotype/unit.obo" fullName="UNIT-ONTOLOGY"/>
-</cvList>
-<AnalysisSoftwareList xmlns="http://psidev.info/psi/pi/mzIdentML/1.1">
-    <AnalysisSoftware version="Beta (v9517)" name="MS-GF+" id="ID_software">
-        <SoftwareName>
-            <cvParam accession="MS:1002048" cvRef="PSI-MS" name="MS-GF+"/>
-        </SoftwareName>
-    </AnalysisSoftware>
-</AnalysisSoftwareList>
-<SequenceCollection xmlns="http://psidev.info/psi/pi/mzIdentML/1.1">
-    <DBSequence accession="sp|ALBU_BOVIN|" searchDatabase_ref="SearchDB_1" length="607" id="DBSeq1">
-        <cvParam accession="MS:1001088" cvRef="PSI-MS" value="sp|ALBU_BOVIN|" name="protein description"/>
-    </DBSequence>
-    <Peptide id="Pep1">
-        <PeptideSequence>YICDNQDTISSK</PeptideSequence>
-        <Modification monoisotopicMassDelta="57.021463735" location="3">
-            <cvParam accession="UNIMOD:4" cvRef="UNIMOD" name="Carbamidomethyl"/>
-        </Modification>
-    </Peptide>
-    <PeptideEvidence isDecoy="false" post="L" pre="K" end="297" start="286" peptide_ref="Pep1" dBSequence_ref="DBSeq1" id="PepEv_286_1_286"/>
-</SequenceCollection>
-<AnalysisCollection xmlns="http://psidev.info/psi/pi/mzIdentML/1.1">
-    <SpectrumIdentification spectrumIdentificationList_ref="SI_LIST_1" spectrumIdentificationProtocol_ref="SearchProtocol_1" id="SpecIdent_1">
-        <InputSpectra spectraData_ref="SID_1"/>
-        <SearchDatabaseRef searchDatabase_ref="SearchDB_1"/>
-    </SpectrumIdentification>
-</AnalysisCollection>
-<AnalysisProtocolCollection xmlns="http://psidev.info/psi/pi/mzIdentML/1.1">
-    <SpectrumIdentificationProtocol analysisSoftware_ref="ID_software" id="SearchProtocol_1">
-        <SearchType>
-            <cvParam accession="MS:1001083" cvRef="PSI-MS" name="ms-ms search"/>
-        </SearchType>
-        <AdditionalSearchParams>
-            <cvParam accession="MS:1001211" cvRef="PSI-MS" name="parent mass type mono"/>
-            <cvParam accession="MS:1001256" cvRef="PSI-MS" name="fragment mass type mono"/>
-            <userParam value="false" name="TargetDecoyApproach"/>
-            <userParam value="0" name="MinIsotopeError"/>
-            <userParam value="1" name="MaxIsotopeError"/>
-            <userParam value="As written in the spectrum or CID if no info" name="FragmentMethod"/>
-            <userParam value="LowRes" name="Instrument"/>
-            <userParam value="NoProtocol" name="Protocol"/>
-            <userParam value="2" name="NumTolerableTermini"/>
-            <userParam value="1" name="NumMatchesPerSpec"/>
-            <userParam value="6" name="MinPepLength"/>
-            <userParam value="40" name="MaxPepLength"/>
-            <userParam value="2" name="MinCharge"/>
-            <userParam value="3" name="MaxCharge"/>
-        </AdditionalSearchParams>
-        <ModificationParams>
-            <SearchModification residues="C" massDelta="57.021465" fixedMod="true">
-                <cvParam accession="UNIMOD:4" cvRef="UNIMOD" name="Carbamidomethyl"/>
-            </SearchModification>
-        </ModificationParams>
-        <Enzymes>
-            <Enzyme missedCleavages="1000" semiSpecific="false" id="Tryp">
-                <EnzymeName>
-                    <cvParam accession="MS:1001251" cvRef="PSI-MS" name="Trypsin"/>
-                </EnzymeName>
-            </Enzyme>
-        </Enzymes>
-        <ParentTolerance>
-            <cvParam accession="MS:1001412" cvRef="PSI-MS" unitCvRef="UO" unitName="parts per million" unitAccession="UO:0000169" value="200.0" name="search tolerance plus value"/>
-            <cvParam accession="MS:1001413" cvRef="PSI-MS" unitCvRef="UO" unitName="parts per million" unitAccession="UO:0000169" value="200.0" name="search tolerance minus value"/>
-        </ParentTolerance>
-        <Threshold>
-            <cvParam accession="MS:1001494" cvRef="PSI-MS" name="no threshold"/>
-        </Threshold>
-    </SpectrumIdentificationProtocol>
-</AnalysisProtocolCollection>
-<DataCollection xmlns="http://psidev.info/psi/pi/mzIdentML/1.1">
-    <Inputs>
-        <SearchDatabase numDatabaseSequences="2" location="/Users/iracooke/Sources/protk-wk/bsa.fasta" id="SearchDB_1">
-            <FileFormat>
-                <cvParam accession="MS:1001348" cvRef="PSI-MS" name="FASTA format"/>
-            </FileFormat>
-            <DatabaseName>
-                <userParam name="bsa.fasta"/>
-            </DatabaseName>
-        </SearchDatabase>
-        <SpectraData location="/Users/iracooke/Sources/protk-wk/bsa.mzML" name="bsa.mzML" id="SID_1">
-            <FileFormat>
-                <cvParam accession="MS:1000584" cvRef="PSI-MS" name="mzML file"/>
-            </FileFormat>
-            <SpectrumIDFormat>
-                <cvParam accession="MS:1000774" cvRef="PSI-MS" name="multiple peak list nativeID format"/>
-            </SpectrumIDFormat>
-        </SpectraData>
-    </Inputs>
-    <AnalysisData>
-        <SpectrumIdentificationList id="SI_LIST_1">
-            <FragmentationTable>
-                <Measure id="Measure_MZ">
-                    <cvParam accession="MS:1001225" cvRef="PSI-MS" unitCvRef="PSI-MS" unitName="m/z" unitAccession="MS:1000040" name="product ion m/z"/>
-                </Measure>
-            </FragmentationTable>
-            <SpectrumIdentificationResult spectraData_ref="SID_1" spectrumID="index=0" id="SIR_1">
-                <SpectrumIdentificationItem passThreshold="true" rank="1" peptide_ref="Pep1" calculatedMassToCharge="722.3251953125" experimentalMassToCharge="722.3522338867188" chargeState="2" id="SII_1_1">
-                    <PeptideEvidenceRef peptideEvidence_ref="PepEv_286_1_286"/>
-                    <cvParam accession="MS:1002049" cvRef="PSI-MS" value="21" name="MS-GF:RawScore"/>
-                    <cvParam accession="MS:1002050" cvRef="PSI-MS" value="71" name="MS-GF:DeNovoScore"/>
-                    <cvParam accession="MS:1002052" cvRef="PSI-MS" value="2.0246382E-9" name="MS-GF:SpecEValue"/>
-                    <cvParam accession="MS:1002053" cvRef="PSI-MS" value="2.2635456E-6" name="MS-GF:EValue"/>
-                    <userParam value="0" name="IsotopeError"/>
-                    <userParam value="CID" name="AssumedDissociationMethod"/>
-                    <userParam value="0.5818614" name="ExplainedIonCurrentRatio"/>
-                    <userParam value="0.12558742" name="NTermIonCurrentRatio"/>
-                    <userParam value="0.45627397" name="CTermIonCurrentRatio"/>
-                    <userParam value="18513.0" name="MS2IonCurrent"/>
-                    <userParam value="61.02686" name="MeanErrorAll"/>
-                    <userParam value="64.35494" name="StdevErrorAll"/>
-                    <userParam value="88.56576" name="MeanErrorTop7"/>
-                    <userParam value="86.03032" name="StdevErrorTop7"/>
-                </SpectrumIdentificationItem>
-            </SpectrumIdentificationResult>
-        </SpectrumIdentificationList>
-    </AnalysisData>
-</DataCollection>
-</MzIdentML>
--- a/tool-data/msgfplus_mods.loc.sample	Tue Mar 18 19:46:10 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,50 +0,0 @@
-#This file lists the names of inbuilt chemical modifications accepted by msgfplus
-#Each entry consists of 4 tab separated fields like this
-#<Displayed Name>	<DBKey>	<Modification String>	<DBKey>
-#
-#Modification strings should conform to the standard MSGFPlus syntax with the following exception
-#The ModType field is overridden by the msgfplus_search.rb tool. In other words any of these mods
-#May be passed to the tool as a variable or fixed mod and the tool will substitute the appropriate ModType
-#value
-#
-#Standard MSGFPlus syntax is
-#
-# To input a modification, use the following command:
-# Mass or CompositionStr, Residues, ModType, Position, Name (all the five fields are required).
-# CompositionStr (C[Num]H[Num]N[Num]O[Num]S[Num]P[Num])
-# 	- C (Carbon), H (Hydrogen), N (Nitrogen), O (Oxygen), S (Sulfer) and P (Phosphorus) are allowed.
-# 	- Atom can be omitted. The sequence of atoms must be followed. 
-# 	- Negative numbers are allowed.
-# 	- E.g. C2H2O1 (valid), H2C1O1 (invalid) 
-# Mass can be used instead of CompositionStr. It is important to specify accurate masses (integer masses are insufficient).
-# 	- E.g. 15.994915 
-# Residues: affected amino acids (must be upper letters)
-# 	- Must be uppor letters or *
-# 	- Use * if this modification is applicable to any residue. 
-# 	- * should not be "anywhere" modification (e.g. "15.994915, *, opt, any, Oxidation" is not allowed.) 
-# 	- E.g. NQ, *
-# ModType: "fix" for fixed modifications, "opt" for variable modifications (case insensitive)
-# Position: position in the peptide where the modification can be attached. 
-# 	- One of the following five values should be used:
-# 	- any (anywhere), N-term (peptide N-term), C-term (peptide C-term), Prot-N-term (protein N-term), Prot-C-term (protein C-term) 
-# 	- Case insensitive
-# 	- "-" can be omitted
-# 	- E.g. any, Any, Prot-n-Term, ProtNTerm => all valid
-# Name: name of the modification (Unimod PSI-MS name)
-# 	- For proper mzIdentML output, this name should be the same as the Unimod PSI-MS name
-# 	- E.g. Phospho, Acetyl
-#C2H3N1O1,C,fix,any,Carbamidomethyl 		# Fixed Carbamidomethyl C
-# Variable Modifications (default: none)
-#O1,M,opt,any,Oxidation				# Oxidation M
-#15.994915,M,opt,any,Oxidation			# Oxidation M (mass is used instead of CompositionStr)
-#H-1N-1O1,NQ,opt,any,Deamidated			# Negative numbers are allowed.
-#C2H3NO,*,opt,N-term,Carbamidomethyl		# Variable Carbamidomethyl N-term
-#H-2O-1,E,opt,N-term,Pyro_glu			# Pyro-glu from E
-#H-3N-1,Q,opt,N-term,Pyro-glu			# Pyro-glu from Q
-#C2H2O,*,opt,Prot-N-term,Acetyl			# Acetylation Protein N-term
-#C2H2O1,K,opt,any,Acetyl			# Acetylation K
-#CH2,K,opt,any,Methy				# Methylation K
-#HO3P,STY,opt,any,Phospho			# Phosphorylation STY
-
-Carbamidomethyl C	carbamidomethyl_c_	C2H3N1O1,C,opt,any,Carbamidomethyl	carbamidomethyl_c_
-Oxidation M	oxidation_m_	O1,M,opt,any,Oxidation	oxidation_m_
\ No newline at end of file
--- a/tool-data/pepxml_databases.loc.sample	Tue Mar 18 19:46:10 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,13 +0,0 @@
-#This file lists the names of protein databases installed locally in protk. 
-# These are used by omssa and x!tandem as well as the "mascot to pepxml" tool
-# In order to combine search results with Interprophet searches must be run against an identical database
-#
-# Entries should follow the be structured as follows
-# Display_name omssa_tandem_dbname dbkey
-#
-#
-Swissprot	spall_	spall	spall_
-Combined PlasmboDB (falciparum) and Swissprot Human	plasmodb_pfalciparum_sphuman_	plasmodb_pfalciparum_sphuman	plasmodb_pfalciparum_sphuman_
-Swissprot Human	sphuman_	sphuman	sphuman_
-Combined Swissprot/TRembl Human	sptrhuman_	sptrhuman	sptrhuman_
-Swissprot Mouse	spmouse_	spmouse	spmouse_
--- a/tool_dependencies.xml	Tue Mar 18 19:46:10 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,12 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-    <package name="protk" version="1.2.6">
-         <repository changeset_revision="1b21ba7de3e7" name="package_protk_1_2_6" owner="iracooke" prior_installation_required="True" toolshed="http://testtoolshed.g2.bx.psu.edu" />
-    </package>
-    <package name="proteowizard" version="3_0_4388">
-         <repository changeset_revision="a2cbef24e9fd" name="package_proteowizard_3_0_4388" owner="iracooke" toolshed="http://testtoolshed.g2.bx.psu.edu" />
-    </package>
-    <package name="msgfplus" version="20140210">
-         <repository changeset_revision="7d614a857e51" name="package_msgfplus_20140210" owner="iracooke" toolshed="http://testtoolshed.g2.bx.psu.edu" />
-    </package>
-</tool_dependency>