Mercurial > repos > iracooke > msgfplus
changeset 0:a84952540969 draft
Uploaded
author | iracooke |
---|---|
date | Mon, 03 Mar 2014 19:05:35 -0500 |
parents | |
children | d3994c3ec487 |
files | README README.md msgfplus_search.xml repository_dependencies.xml test-data/bsa.fasta test-data/bsa.mzML test-data/bsa.mzid tool-data/msgfplus_mods.loc.sample tool-data/pepxml_databases.loc.sample tool_dependencies.xml |
diffstat | 10 files changed, 524 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README Mon Mar 03 19:05:35 2014 -0500 @@ -0,0 +1,7 @@ +This package is a galaxy wrapper for the MSGF+ search tool. + +Requirements: +This package depends on the galaxy_protk, protk_msgfplus, protk_proteowizard packages +Please see instructions for those packages before installing + +In addition to basic requirements you must also have unzip and java 6 runtime (or greater) installed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.md Mon Mar 03 19:05:35 2014 -0500 @@ -0,0 +1,8 @@ +## What is it? +Galaxy tool definition file and wrapper scripts for the [MSGF+ Search Engine](http://proteomics.ucsd.edu/Software/MSGFPlus.html). + +## Installation +Install from the main galaxy toolshed at http://toolshed.g2.bx.psu.edu/ + +Depends on command-line scripts and databases available in the [protk ruby gem](https://bitbucket.org/iracooke/protk). +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/msgfplus_search.xml Mon Mar 03 19:05:35 2014 -0500 @@ -0,0 +1,185 @@ +<tool id="proteomics_search_msgfplus_1" name="MSGF+ MSMS Search" version="1.0.1"> + <description>Run an MSGF+ Search</description> + <requirements> + <requirement type="package" version="1.2.6">protk</requirement> + <requirement type="package" version="20140210">msgfplus</requirement> + <requirement type="package" version="3_0_4388">proteowizard</requirement> + </requirements> + <command> + msgfplus_search.rb + #if $database.source_select=="built_in": + --galaxy -d $database.dbkey + #else + --galaxy -d $database.fasta_file + #end if + + --var-mods=' + $variable_mods + #for $custom_variable_mod in $custom_variable_mods: + ,${custom_variable_mod.custom_mod} + #end for + ' + + --fix-mods=' + $fixed_mods + #for $custom_fix_mod in $custom_fix_mods: + ,${custom_fix_mod.custom_mod} + #end for + ' + $input_file + -o $output + -r + --enzyme=$enzyme + --precursor-ion-tol-units=$precursor_tolu + -v $missed_cleavages + -f $fragment_ion_tol + -p $precursor_ion_tol + --instrument=$instrument + --isotope-error-range=$isotope_error_range + --fragment-method=$fragment_method + --protocol=$protocol + --min-pep-len=$min_pep_len + --max-pep-len=$max_pep_len + --max-pep-charge=$max_pep_charge + --min-pep-charge=$min_pep_charge + --num-reported-matches=$num_reported_matches + --java-mem=$java_mem + #if $pepxml_output_use + #else + --no-pepxml + #end if + </command> + + <inputs> + <conditional name="database"> + <param name="source_select" type="select" label="Database source"> + <option value="built_in">Built-In</option> + <option value="input_ref">Your Upload File</option> + </param> + <when value="built_in"> + <param name="dbkey" type="select" format="text" > + <label>Database</label> + <options from_file="pepxml_databases.loc"> + <column name="name" index="0" /> + <column name="value" index="2" /> + </options> + </param> + </when> + <when value="input_ref"> + <param name="fasta_file" type="data" format="fasta" label="Uploaded FASTA file" /> + </when> + </conditional> + <param name="input_file" type="data" format="mzml" multiple="false" label="MSMS File" help="An mzML file with MS/MS data"/> + <param name="variable_mods" format="text" type="select" multiple="true" label="Variable Modifications" help="Multiple Selection Allowed"> + <options from_file="msgfplus_mods.loc"> + <column name="name" index="0" /> + <column name="value" index="2" /> + </options> + </param> + <repeat name="custom_variable_mods" title="Custom Variable Modifications" + help="See https://bix-lab.ucsd.edu/pages/viewpage.action?pageId=13533355 for details on how to create these"> + <param name="custom_mod" type="text" /> + </repeat> + <param name="fixed_mods" format="text" type="select" multiple="true" label="Fixed Modifications" help="Multiple Selection Allowed"> + <options from_file="msgfplus_mods.loc"> + <column name="name" index="0" /> + <column name="value" index="2" /> + </options> + </param> + <repeat name="custom_fix_mods" title="Custom Fixed Modifications" help="See https://bix-lab.ucsd.edu/pages/viewpage.action?pageId=13533355 for details on how to create these"> + <param name="custom_mod" type="text"> + </param> + </repeat> + <param name="missed_cleavages" type="select" format="text" help="Allow peptides to contain up to this many missed enzyme cleavage sites"> + <label>Missed Cleavages Allowed</label> + <option value="0">0</option> + <option value="1">1</option> + <option value="2">2</option> + </param> + + <param name="enzyme" type="select" format="text"> + <label>Enzyme</label> + <option value="0">unspecific cleavage</option> + <option value="1">Trypsin</option> + <option value="2">Chymotrypsin</option> + <option value="3">Lys-C</option> + <option value="4">Lys-N</option> + <option value="5">glutamyl endopeptidase</option> + <option value="6">Arg-C</option> + <option value="7">Asp-N</option> + <option value="8">alphaLP</option> + <option value="9">no cleavage</option> + </param> + + <param name="instrument" type="select" format="text"> + <label>Instrument Type</label> + <option value="2">TOF</option> + <option value="0">Low-res LCQ/LTQ</option> + <option value="1">High-res LTQ</option> + </param> + + <param name="fragment_method" type="select" format="text"> + <label>Fragmentation Method</label> + <option value="0">Respect Input File</option> + <option value="1">CID</option> + <option value="2">ETD</option> + <option value="3">HCD</option> + <option value="4">Merge spectra from same precursor</option> + </param> + + <param name="protocol" type="select" format="text"> + <label>Protocol</label> + <option value="0">NoProtocol</option> + <option value="1">Phosphorylation</option> + <option value="2">iTRAQ</option> + <option value="3">iTRAQPhospho</option> + </param> + + <param name="fragment_ion_tol" help="Fragment Ion Tolerance in Daltons" type="float" value="0.65" min="0" max="10000" label="Fragment ion tolerance"/> + + <param name="precursor_ion_tol" help="Precursor Ion Tolerance (Da or ppm)" type="float" value="100" min="0" max="10000" label="Precursor ion tolerance"/> + <param name="precursor_tolu" type="select" format="text"> + <label>Precursor Ion Tolerance Units</label> + <option value="ppm">ppm</option> + <option value="Da">Da</option> + </param> + + <param name="isotope_error_range" help="Takes into account of the error introduced by chooosing a non-monoisotopic peak for fragmentation." type="text" size="80" value="0,1" label="Isotope Error Range"/> + <param name="min_pep_len" help="" type="integer" value="6" label="Minimum Peptide Length"/> + <param name="max_pep_len" help="" type="integer" value="40" label="Maximum Peptide Length"/> + <param name="min_pep_charge" help="" type="integer" value="2" label="Minimum Peptide Charge"/> + <param name="max_pep_charge" help="" type="integer" value="3" label="Maximum Peptide Charge"/> + <param name="num_reported_matches" help="Number of matches per spectrum to be reported" type="integer" value="1" label="Num reported matches"/> + <param name="java_mem" help="Increase this value if you get out of memory errors" type="text" size="80" value="3500M" label="Java Memory Limit"/> + <param name="pepxml_output_use" type="boolean" label="Convert results to pepXML" help="" truevalue="true" falsevalue="false" /> + </inputs> + <outputs> + <data format="mzid" name="output" metadata_source="input_file" label="MSGF+_vs_${database.dbkey if $database.has_key('dbkey') else $database.fasta_file.display_name}.${input_file.display_name}.${input_file.display_name}"> + <change_format> + <when input="pepxml_output_use" value="true" format="raw_pepxml" metadata_source="input_file" + label="MSGF+_vs_${database.dbkey if $database.has_key('dbkey') else $database.fasta_file.display_name}.${input_file.display_name}.${input_file.display_name}"/> + </change_format> + </data> + </outputs> + <tests> + <test> + <param name="source_select" value="input_ref"/> + <param name="fasta_file" value="bsa.fasta"/> + <param name="input_file" value="bsa.mzML"/> + <output name="output" file="bsa.mzid" compare="sim_size" delta="600" /> + </test> + </tests> + <help> + +**What it does** + +Runs an MS/MS database search using the MSGFPlus search engine. Output is in the form of a pepXML file containing identified peptides along with their raw search scores. + +---- + +**References** + +Please see http://proteomics.ucsd.edu/Software/MSGFPlus.html for details of the MSGFPlus search engine and references describing its algorithm + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/repository_dependencies.xml Mon Mar 03 19:05:35 2014 -0500 @@ -0,0 +1,4 @@ +<?xml version="1.0"?> +<repositories description="Proteomics datatypes, MSGF+ and Protk"> + <repository changeset_revision="7101f7e4b00b" name="proteomics_datatypes" owner="iracooke" toolshed="http://testtoolshed.g2.bx.psu.edu" /> + </repositories>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bsa.fasta Mon Mar 03 19:05:35 2014 -0500 @@ -0,0 +1,26 @@ +>sp|ALBU_BOVIN| +MKWVTFISLLLLFSSAYSRGVFRRDTHKSEIAHRFKDLGEEHFKGLVLIA +FSQYLQQCPFDEHVKLVNELTEFAKTCVADESHAGCEKSLHTLFGDELCK +VASLRETYGDMADCCEKQEPERNECFLSHKDDSPDLPKLKPDPNTLCDEF +KADEKKFWGKYLYEIARRHPYFYAPELLYYANKYNGVFQECCQAEDKGAC +LLPKIETMREKVLASSARQRLRCASIQKFGERALKAWSVARLSQKFPKAE +FVEVTKLVTDLTKVHKECCHGDLLECADDRADLAKYICDNQDTISSKLKE +CCDKPLLEKSHCIAEVEKDAIPENLPPLTADFAEDKDVCKNYQEAKDAFL +GSFLYEYSRRHPEYAVSVLLRLAKEYEATLEECCAKDDPHACYSTVFDKL +KHLVDEPQNLIKQNCDQFEKLGEYGFQNALIVRYTRKVPQVSTPTLVEVS +RSLGKVGTRCCTKPESERMPCTEDYLSLILNRLCVLHEKTPVSEKVTKCC +TESLVNRRPCFSALTPDETYVPKAFDEKLFTFHADICTLPDTEKQIKKQT +ALVELLKHKPKATEEQLKTVMENFVAFVDKCCAADDKEACFAVEGPKLVV +STQTALA +>sp|AMYS_HUMAN| +MKLFWLLFTIGFCWAQYSSNTQQGRTSIVHLFEWRWVDIALECERYLAPK +GFGGVQVSPPNENVAIHNPFRPWWERYQPVSYKLCTRSGNEDEFRNMVTR +CNNVGVRIYVDAVINHMCGNAVSAGTSSTCGSYFNPGSRDFPAVPYSGWD +FNDGKCKTGSGDIENYNDATQVRDCRLSGLLDLALGKDYVRSKIAEYMNH +LIDIGVAGFRIDASKHMWPGDIKAILDKLHNLNSNWFPEGSKPFIYQEVI +DLGGEPIKSSDYFGNGRVTEFKYGAKLGTVIRKWNGEKMSYLKNWGEGWG +FMPSDRALVFVDNHDNQRGHGAGGASILTFWDARLYKMAVGFMLAHPYGF +TRVMSSYRWPRYFENGKDVNDWVGPPNDNGVTKEVTINPDTTCGNDWVCE +HRWRQIRNMVNFRNVVDGQPFTNWYDNGSNQVAFGRGNRGFIVFNNDDWT +FSLTLQTGLPAGTYCDVISGDKINGNCTGIKIYVSDDGKAHFSISNSAED +PFIAIHAESKL \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bsa.mzML Mon Mar 03 19:05:35 2014 -0500 @@ -0,0 +1,96 @@ +<?xml version="1.0" encoding="ISO-8859-1"?> +<indexedmzML xmlns="http://psi.hupo.org/ms/mzml" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.1_idx.xsd"> + <mzML xmlns="http://psi.hupo.org/ms/mzml" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.0.xsd" id="bsa" version="1.1.0"> + <cvList count="2"> + <cv id="MS" fullName="Proteomics Standards Initiative Mass Spectrometry Ontology" version="3.30.0" URI="http://psidev.cvs.sourceforge.net/*checkout*/psidev/psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo"/> + <cv id="UO" fullName="Unit Ontology" version="12:10:2011" URI="http://obo.cvs.sourceforge.net/*checkout*/obo/obo/ontology/phenotype/unit.obo"/> + </cvList> + <fileDescription> + <fileContent> + <cvParam cvRef="MS" accession="MS:1000580" name="MSn spectrum" value=""/> + <cvParam cvRef="MS" accession="MS:1000127" name="centroid spectrum" value=""/> + </fileContent> + <sourceFileList count="1"> + <sourceFile id="bsa.mgf" name="bsa.mgf" location="file:///"> + <cvParam cvRef="MS" accession="MS:1000774" name="multiple peak list nativeID format" value=""/> + <cvParam cvRef="MS" accession="MS:1001062" name="Mascot MGF file" value=""/> + <cvParam cvRef="MS" accession="MS:1000569" name="SHA-1" value="92c6fa5d4ff59e2124950546a651bb9a85bbfd92"/> + </sourceFile> + </sourceFileList> + </fileDescription> + <softwareList count="1"> + <software id="pwiz_3.0.4140" version="3.0.4140"> + <cvParam cvRef="MS" accession="MS:1000615" name="ProteoWizard" value=""/> + </software> + </softwareList> + <instrumentConfigurationList count="1"> + <instrumentConfiguration id="IC"> + <cvParam cvRef="MS" accession="MS:1000031" name="instrument model" value=""/> + </instrumentConfiguration> + </instrumentConfigurationList> + <dataProcessingList count="1"> + <dataProcessing id="pwiz_Reader_conversion"> + <processingMethod order="0" softwareRef="pwiz_3.0.4140"> + <cvParam cvRef="MS" accession="MS:1000544" name="Conversion to mzML" value=""/> + </processingMethod> + <processingMethod order="1" softwareRef="pwiz_3.0.4140"> + <userParam name="most intense count (excluding ties at the threshold)" value="100"/> + </processingMethod> + </dataProcessing> + </dataProcessingList> + <run id="bsa" defaultInstrumentConfigurationRef="IC"> + <spectrumList count="1" defaultDataProcessingRef="pwiz_Reader_conversion"> + <spectrum index="0" id="index=0" defaultArrayLength="97" dataProcessingRef="pwiz_Reader_conversion"> + <cvParam cvRef="MS" accession="MS:1000580" name="MSn spectrum" value=""/> + <cvParam cvRef="MS" accession="MS:1000511" name="ms level" value="2"/> + <cvParam cvRef="MS" accession="MS:1000127" name="centroid spectrum" value=""/> + <cvParam cvRef="MS" accession="MS:1000796" name="spectrum title" value="Cmpd 1, +MSn(722.3522), 33.5 min"/> + <cvParam cvRef="MS" accession="MS:1000130" name="positive scan" value=""/> + <cvParam cvRef="MS" accession="MS:1000528" name="lowest observed m/z" value="198.13402"/> + <cvParam cvRef="MS" accession="MS:1000527" name="highest observed m/z" value="1280.62643"/> + <cvParam cvRef="MS" accession="MS:1000285" name="total ion current" value="21166.0"/> + <cvParam cvRef="MS" accession="MS:1000504" name="base peak m/z" value="249.17284"/> + <cvParam cvRef="MS" accession="MS:1000505" name="base peak intensity" value="2164.0"/> + <scanList count="1"> + <cvParam cvRef="MS" accession="MS:1000795" name="no combination" value=""/> + <scan> + </scan> + </scanList> + <precursorList count="1"> + <precursor> + <selectedIonList count="1"> + <selectedIon> + <cvParam cvRef="MS" accession="MS:1000744" name="selected ion m/z" value="722.35225" unitCvRef="MS" unitAccession="MS:1000040" unitName="m/z"/> + <cvParam cvRef="MS" accession="MS:1000041" name="charge state" value="2"/> + </selectedIon> + </selectedIonList> + <activation> + </activation> + </precursor> + </precursorList> + <binaryDataArrayList count="2"> + <binaryDataArray encodedLength="1036"> + <cvParam cvRef="MS" accession="MS:1000523" name="64-bit float" value=""/> + <cvParam cvRef="MS" accession="MS:1000576" name="no compression" value=""/> + <cvParam cvRef="MS" accession="MS:1000514" name="m/z array" value="" unitCvRef="MS" unitAccession="MS:1000040" unitName="m/z"/> + <binary>RPrt68CEaUDI7236swRrQLecS3FVwmxAdcjNcANFbUBlwi/182NuQL/xtWeWAm9AGm7A54clb0BNSkG3FyJxQJOMnIU9QXFAFCLgEKpScUCe6pCb4fJyQNbiUwAME3RA529CIQJSdUDmllZDYmJ2QFch5SfVQXdAOiNKe4NRd0A2WaMeolJ4QGwE4nX9YXhAG7tE9VZyeUAaNPRPcNN6QDyInSl0JHtAEqCmli1Te0CuDRXj/JJ7QBsS91h6gnxAet/42jOzfEC94xQdyZJ9QEbT2clgMn5Ah78ma9RCf0CAYI4ev1J/QPCiryCNKoBAqiuf5XkxgEDtuyL4H3qAQDI4Sl7dgYBAEVMiiZ66gEBi+IiY0tGAQDMbZJKR2YBAjL6CNCMKgUA0ETY8vUGBQEymCkYlUoFAzzEge71hgUB8D5cc97WBQKYnLPHAuYFArDlAMEfigUBWSPlJ9fGBQP2H9NsX/oFApMLYQlApgkBmFMstLUKCQG2tLxLaWYJAMPDce3ixgkAYsrrVc7mCQHAlOzbCOYNAPrMkQI1Bg0Ang6PkFWqDQOrPfqRIgoNAdZMYBBaKg0BaL4ZyIrqDQGIQWDm0woNANUHUfcDJg0AMAiuHVgqEQCzUmubdUoRAL6NYbilqhEDXwFYJlpqEQGjon+CiooRA9Pi9TT/ShEBrZcIvNdqEQBqjdVR16oRAs5jYfBzyhEBx5ldzwFGFQIz4Tsx6eoVAhPBo4wiKhUCCHJQwE5KFQCUGgZUD0oVAy9b6IuHZhUBeukkMAmKGQM11Gmkp8oZA1xcJbTnDh0AWwf9WMsuHQJhRLLd0SohAldQJaGJTiEAUyy2thoKIQNuizAbZColAjZyFPa0SiUDLSpNSkGKJQCveyDzymolAS80eaMXqiUB47j1csvKJQONTAIzneopAejarPlfTikAMPPcerlOLQDdPdciNW4tA1EM0usPji0CY3ZOHBauMQD90QX3L845A/+xHigh8j0D7OnDOKPaRQAUXK2oQ+pFAbqMBvCU+kkA=</binary> + </binaryDataArray> + <binaryDataArray encodedLength="520"> + <cvParam cvRef="MS" accession="MS:1000521" name="32-bit float" value=""/> + <cvParam cvRef="MS" accession="MS:1000576" name="no compression" value=""/> + <cvParam cvRef="MS" accession="MS:1000515" name="intensity array" value="" unitCvRef="MS" unitAccession="MS:1000131" unitName="number of counts"/> + <binary>AABIQgAAUEIAAMpCAABkQwAA8EEAACBCAEAHRQAAgEIAACRDAICRQwAAhEIAAE9EAADwQQAAjkIAAAhCAAAEQgAAMEIAAChDAACmQgAA4EEAAJBDAADGQgAA1EIAAHhCAAAEQwAAHEMAAKpCAAAcQgAAgUMAALRCAIDuQwAADEIAAJxCAMCERAAAOEIAADlDAAAAQgAA3kIAAPxCAICfQwAASUMAAAtDAADoQQAA00MAgMNDAADoQQAA+UMAAOBBAABUQgAAH0MAAExCAICFQwAA+EEAABxCAACMQgAAIEIAAKhCAIDVQwAAKEMAAFpEAAAIQgAABEIAAOBBAAAMQgAA4EEAAIpCAAA1QwAA+EIAAFBCAADoQQAAEEIAACRCAIC8QwAAFUQAAKRDAAAkQgAAgUMAAAhCAIDFQwAA9kIAAOhBAADEQgAAFEMAAAxCAADUQgAAO0MAAApDAADgQQAA8EEAAANDAEAmRAAACEIAAL5CAAASRAAAAEIAABFDAEBQRA==</binary> + </binaryDataArray> + </binaryDataArrayList> + </spectrum> + </spectrumList> + </run> + </mzML> + <indexList count="2"> + <index name="spectrum"> + <offset idRef="index=0">2685</offset> + </index> + </indexList> + <indexListOffset>7058</indexListOffset> + <fileChecksum>b76171188a63cfad075d3738a172b15f1f9e0c4c</fileChecksum> +</indexedmzML>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bsa.mzid Mon Mar 03 19:05:35 2014 -0500 @@ -0,0 +1,123 @@ +<?xml version="1.0" encoding="UTF-8"?> +<MzIdentML id="MS-GF+" version="1.1.0" xmlns="http://psidev.info/psi/pi/mzIdentML/1.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psidev.info/psi/pi/mzIdentML/1.1 http://www.psidev.info/files/mzIdentML1.1.0.xsd" creationDate="2013-06-05T16:52:19" > +<cvList xmlns="http://psidev.info/psi/pi/mzIdentML/1.1"> + <cv id="PSI-MS" uri="http://psidev.cvs.sourceforge.net/viewvc/*checkout*/psidev/psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo" version="3.30.0" fullName="PSI-MS"/> + <cv id="UNIMOD" uri="http://www.unimod.org/obo/unimod.obo" fullName="UNIMOD"/> + <cv id="UO" uri="http://obo.cvs.sourceforge.net/*checkout*/obo/obo/ontology/phenotype/unit.obo" fullName="UNIT-ONTOLOGY"/> +</cvList> +<AnalysisSoftwareList xmlns="http://psidev.info/psi/pi/mzIdentML/1.1"> + <AnalysisSoftware version="Beta (v9517)" name="MS-GF+" id="ID_software"> + <SoftwareName> + <cvParam accession="MS:1002048" cvRef="PSI-MS" name="MS-GF+"/> + </SoftwareName> + </AnalysisSoftware> +</AnalysisSoftwareList> +<SequenceCollection xmlns="http://psidev.info/psi/pi/mzIdentML/1.1"> + <DBSequence accession="sp|ALBU_BOVIN|" searchDatabase_ref="SearchDB_1" length="607" id="DBSeq1"> + <cvParam accession="MS:1001088" cvRef="PSI-MS" value="sp|ALBU_BOVIN|" name="protein description"/> + </DBSequence> + <Peptide id="Pep1"> + <PeptideSequence>YICDNQDTISSK</PeptideSequence> + <Modification monoisotopicMassDelta="57.021463735" location="3"> + <cvParam accession="UNIMOD:4" cvRef="UNIMOD" name="Carbamidomethyl"/> + </Modification> + </Peptide> + <PeptideEvidence isDecoy="false" post="L" pre="K" end="297" start="286" peptide_ref="Pep1" dBSequence_ref="DBSeq1" id="PepEv_286_1_286"/> +</SequenceCollection> +<AnalysisCollection xmlns="http://psidev.info/psi/pi/mzIdentML/1.1"> + <SpectrumIdentification spectrumIdentificationList_ref="SI_LIST_1" spectrumIdentificationProtocol_ref="SearchProtocol_1" id="SpecIdent_1"> + <InputSpectra spectraData_ref="SID_1"/> + <SearchDatabaseRef searchDatabase_ref="SearchDB_1"/> + </SpectrumIdentification> +</AnalysisCollection> +<AnalysisProtocolCollection xmlns="http://psidev.info/psi/pi/mzIdentML/1.1"> + <SpectrumIdentificationProtocol analysisSoftware_ref="ID_software" id="SearchProtocol_1"> + <SearchType> + <cvParam accession="MS:1001083" cvRef="PSI-MS" name="ms-ms search"/> + </SearchType> + <AdditionalSearchParams> + <cvParam accession="MS:1001211" cvRef="PSI-MS" name="parent mass type mono"/> + <cvParam accession="MS:1001256" cvRef="PSI-MS" name="fragment mass type mono"/> + <userParam value="false" name="TargetDecoyApproach"/> + <userParam value="0" name="MinIsotopeError"/> + <userParam value="1" name="MaxIsotopeError"/> + <userParam value="As written in the spectrum or CID if no info" name="FragmentMethod"/> + <userParam value="LowRes" name="Instrument"/> + <userParam value="NoProtocol" name="Protocol"/> + <userParam value="2" name="NumTolerableTermini"/> + <userParam value="1" name="NumMatchesPerSpec"/> + <userParam value="6" name="MinPepLength"/> + <userParam value="40" name="MaxPepLength"/> + <userParam value="2" name="MinCharge"/> + <userParam value="3" name="MaxCharge"/> + </AdditionalSearchParams> + <ModificationParams> + <SearchModification residues="C" massDelta="57.021465" fixedMod="true"> + <cvParam accession="UNIMOD:4" cvRef="UNIMOD" name="Carbamidomethyl"/> + </SearchModification> + </ModificationParams> + <Enzymes> + <Enzyme missedCleavages="1000" semiSpecific="false" id="Tryp"> + <EnzymeName> + <cvParam accession="MS:1001251" cvRef="PSI-MS" name="Trypsin"/> + </EnzymeName> + </Enzyme> + </Enzymes> + <ParentTolerance> + <cvParam accession="MS:1001412" cvRef="PSI-MS" unitCvRef="UO" unitName="parts per million" unitAccession="UO:0000169" value="200.0" name="search tolerance plus value"/> + <cvParam accession="MS:1001413" cvRef="PSI-MS" unitCvRef="UO" unitName="parts per million" unitAccession="UO:0000169" value="200.0" name="search tolerance minus value"/> + </ParentTolerance> + <Threshold> + <cvParam accession="MS:1001494" cvRef="PSI-MS" name="no threshold"/> + </Threshold> + </SpectrumIdentificationProtocol> +</AnalysisProtocolCollection> +<DataCollection xmlns="http://psidev.info/psi/pi/mzIdentML/1.1"> + <Inputs> + <SearchDatabase numDatabaseSequences="2" location="/Users/iracooke/Sources/protk-wk/bsa.fasta" id="SearchDB_1"> + <FileFormat> + <cvParam accession="MS:1001348" cvRef="PSI-MS" name="FASTA format"/> + </FileFormat> + <DatabaseName> + <userParam name="bsa.fasta"/> + </DatabaseName> + </SearchDatabase> + <SpectraData location="/Users/iracooke/Sources/protk-wk/bsa.mzML" name="bsa.mzML" id="SID_1"> + <FileFormat> + <cvParam accession="MS:1000584" cvRef="PSI-MS" name="mzML file"/> + </FileFormat> + <SpectrumIDFormat> + <cvParam accession="MS:1000774" cvRef="PSI-MS" name="multiple peak list nativeID format"/> + </SpectrumIDFormat> + </SpectraData> + </Inputs> + <AnalysisData> + <SpectrumIdentificationList id="SI_LIST_1"> + <FragmentationTable> + <Measure id="Measure_MZ"> + <cvParam accession="MS:1001225" cvRef="PSI-MS" unitCvRef="PSI-MS" unitName="m/z" unitAccession="MS:1000040" name="product ion m/z"/> + </Measure> + </FragmentationTable> + <SpectrumIdentificationResult spectraData_ref="SID_1" spectrumID="index=0" id="SIR_1"> + <SpectrumIdentificationItem passThreshold="true" rank="1" peptide_ref="Pep1" calculatedMassToCharge="722.3251953125" experimentalMassToCharge="722.3522338867188" chargeState="2" id="SII_1_1"> + <PeptideEvidenceRef peptideEvidence_ref="PepEv_286_1_286"/> + <cvParam accession="MS:1002049" cvRef="PSI-MS" value="21" name="MS-GF:RawScore"/> + <cvParam accession="MS:1002050" cvRef="PSI-MS" value="71" name="MS-GF:DeNovoScore"/> + <cvParam accession="MS:1002052" cvRef="PSI-MS" value="2.0246382E-9" name="MS-GF:SpecEValue"/> + <cvParam accession="MS:1002053" cvRef="PSI-MS" value="2.2635456E-6" name="MS-GF:EValue"/> + <userParam value="0" name="IsotopeError"/> + <userParam value="CID" name="AssumedDissociationMethod"/> + <userParam value="0.5818614" name="ExplainedIonCurrentRatio"/> + <userParam value="0.12558742" name="NTermIonCurrentRatio"/> + <userParam value="0.45627397" name="CTermIonCurrentRatio"/> + <userParam value="18513.0" name="MS2IonCurrent"/> + <userParam value="61.02686" name="MeanErrorAll"/> + <userParam value="64.35494" name="StdevErrorAll"/> + <userParam value="88.56576" name="MeanErrorTop7"/> + <userParam value="86.03032" name="StdevErrorTop7"/> + </SpectrumIdentificationItem> + </SpectrumIdentificationResult> + </SpectrumIdentificationList> + </AnalysisData> +</DataCollection> +</MzIdentML>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/msgfplus_mods.loc.sample Mon Mar 03 19:05:35 2014 -0500 @@ -0,0 +1,50 @@ +#This file lists the names of inbuilt chemical modifications accepted by msgfplus +#Each entry consists of 4 tab separated fields like this +#<Displayed Name> <DBKey> <Modification String> <DBKey> +# +#Modification strings should conform to the standard MSGFPlus syntax with the following exception +#The ModType field is overridden by the msgfplus_search.rb tool. In other words any of these mods +#May be passed to the tool as a variable or fixed mod and the tool will substitute the appropriate ModType +#value +# +#Standard MSGFPlus syntax is +# +# To input a modification, use the following command: +# Mass or CompositionStr, Residues, ModType, Position, Name (all the five fields are required). +# CompositionStr (C[Num]H[Num]N[Num]O[Num]S[Num]P[Num]) +# - C (Carbon), H (Hydrogen), N (Nitrogen), O (Oxygen), S (Sulfer) and P (Phosphorus) are allowed. +# - Atom can be omitted. The sequence of atoms must be followed. +# - Negative numbers are allowed. +# - E.g. C2H2O1 (valid), H2C1O1 (invalid) +# Mass can be used instead of CompositionStr. It is important to specify accurate masses (integer masses are insufficient). +# - E.g. 15.994915 +# Residues: affected amino acids (must be upper letters) +# - Must be uppor letters or * +# - Use * if this modification is applicable to any residue. +# - * should not be "anywhere" modification (e.g. "15.994915, *, opt, any, Oxidation" is not allowed.) +# - E.g. NQ, * +# ModType: "fix" for fixed modifications, "opt" for variable modifications (case insensitive) +# Position: position in the peptide where the modification can be attached. +# - One of the following five values should be used: +# - any (anywhere), N-term (peptide N-term), C-term (peptide C-term), Prot-N-term (protein N-term), Prot-C-term (protein C-term) +# - Case insensitive +# - "-" can be omitted +# - E.g. any, Any, Prot-n-Term, ProtNTerm => all valid +# Name: name of the modification (Unimod PSI-MS name) +# - For proper mzIdentML output, this name should be the same as the Unimod PSI-MS name +# - E.g. Phospho, Acetyl +#C2H3N1O1,C,fix,any,Carbamidomethyl # Fixed Carbamidomethyl C +# Variable Modifications (default: none) +#O1,M,opt,any,Oxidation # Oxidation M +#15.994915,M,opt,any,Oxidation # Oxidation M (mass is used instead of CompositionStr) +#H-1N-1O1,NQ,opt,any,Deamidated # Negative numbers are allowed. +#C2H3NO,*,opt,N-term,Carbamidomethyl # Variable Carbamidomethyl N-term +#H-2O-1,E,opt,N-term,Pyro_glu # Pyro-glu from E +#H-3N-1,Q,opt,N-term,Pyro-glu # Pyro-glu from Q +#C2H2O,*,opt,Prot-N-term,Acetyl # Acetylation Protein N-term +#C2H2O1,K,opt,any,Acetyl # Acetylation K +#CH2,K,opt,any,Methy # Methylation K +#HO3P,STY,opt,any,Phospho # Phosphorylation STY + +Carbamidomethyl C carbamidomethyl_c_ C2H3N1O1,C,opt,any,Carbamidomethyl carbamidomethyl_c_ +Oxidation M oxidation_m_ O1,M,opt,any,Oxidation oxidation_m_ \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/pepxml_databases.loc.sample Mon Mar 03 19:05:35 2014 -0500 @@ -0,0 +1,13 @@ +#This file lists the names of protein databases installed locally in protk. +# These are used by omssa and x!tandem as well as the "mascot to pepxml" tool +# In order to combine search results with Interprophet searches must be run against an identical database +# +# Entries should follow the be structured as follows +# Display_name omssa_tandem_dbname dbkey +# +# +Swissprot spall_ spall spall_ +Combined PlasmboDB (falciparum) and Swissprot Human plasmodb_pfalciparum_sphuman_ plasmodb_pfalciparum_sphuman plasmodb_pfalciparum_sphuman_ +Swissprot Human sphuman_ sphuman sphuman_ +Combined Swissprot/TRembl Human sptrhuman_ sptrhuman sptrhuman_ +Swissprot Mouse spmouse_ spmouse spmouse_
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Mon Mar 03 19:05:35 2014 -0500 @@ -0,0 +1,12 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="protk" version="1.2.6"> + <repository changeset_revision="bc16eb3ec159" name="package_protk_1_2_6" owner="iracooke" prior_installation_required="True" toolshed="http://testtoolshed.g2.bx.psu.edu" /> + </package> + <package name="proteowizard" version="3_0_4388"> + <repository changeset_revision="a2cbef24e9fd" name="package_proteowizard_3_0_4388" owner="iracooke" toolshed="http://testtoolshed.g2.bx.psu.edu" /> + </package> + <package name="msgfplus" version="20140210"> + <repository changeset_revision="9b21b161ac97" name="package_msgfplus__20140210" owner="iracooke" toolshed="http://testtoolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>