diff pathwaymatcher.xml @ 4:36cc1538f775 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pathwaymatcher commit 1d08bec4a320f62652392f08c2481bbe215e37fa
author galaxyp
date Sun, 14 Jul 2019 05:09:12 -0400
parents f3be3f08dcfa
children 00ee806dd5ff
line wrap: on
line diff
--- a/pathwaymatcher.xml	Wed May 08 13:40:59 2019 -0400
+++ b/pathwaymatcher.xml	Sun Jul 14 05:09:12 2019 -0400
@@ -4,7 +4,7 @@
     </description>
     <macros>
        <token name="@PATHWAYMATCHER_VERSION@">1.9.1</token>
-       <token name="@TOOL_SUBVERSION@">1</token>
+       <token name="@TOOL_SUBVERSION@">2</token>
        <xml name="input_fasta">
          <param format="fasta" name="input_database" type="data" label="Protein Database"
              help="Select FASTA database from history"/>
@@ -18,6 +18,7 @@
         <exit_code range="1:" level="fatal" description="Job Failed" />
         <regex match="java.*Exception" level="fatal" description="Java Exception"/>
         <regex match="Could not create the Java virtual machine" level="fatal" description="JVM Error"/>
+        <regex match="filename not matched:  reports/proteins_proteoforms.txt" level="fatal" description="PeptideShaker archive does not contain the proteoforms file. It may have been created by a 1.x PeptideShaker version."/>
     </stdio>
     <command>
 <![CDATA[
@@ -33,12 +34,11 @@
         cwd=`pwd`;
         export HOME=\$cwd;
 
-        ## If we use peptideshaker files as inputs, firstly we need to uncompress their proteoforms files.
+        ## If we use peptideshaker 2.x files as inputs, firstly we need to uncompress their proteoforms files.
         #for $i, $s in enumerate($match_types)
             #if $s.match_type.match_type_selector == "peptideshakerzip_proteoforms"
-                ##unzip -l $s.match_type.input_peptideshakerzip_proteoforms;
-                unzip -j '${$s.match_type.input_peptideshakerzip_proteoforms}' 'output_reports/proteoforms.txt' -d './';
-                mv proteoforms.txt ps_proteoforms_'${$i}'.txt;
+                unzip -j '${$s.match_type.input_peptideshakerzip_proteoforms}' 'reports/proteins_proteoforms.txt' -d './';
+                mv proteins_proteoforms.txt ps_proteoforms_'${$i}'.txt;
             #end if
         #end for
 
@@ -49,7 +49,6 @@
 
             #for $i, $s in enumerate($match_types)
 
-
                 ## PROTEOFORMS
 
                 #if $s.match_type.match_type_selector == "proteoforms"
@@ -147,15 +146,12 @@
         #if $output_options.output_graphs:
             mkdir "graphs";
         #end if
-
         #if 'gg' in $output_graphs_list:
             mv -t "graphs" "geneExternalEdges.tsv" "geneInternalEdges.tsv" "geneVertices.tsv" ;
         #end if
-
         #if 'gu' in $output_graphs_list:
             mv -t "graphs" "proteinExternalEdges.tsv" "proteinInternalEdges.tsv" "proteinVertices.tsv";
         #end if
-
         #if 'gp' in $output_graphs_list:
             mv -t "graphs" "proteoformExternalEdges.tsv" "proteoformInternalEdges.tsv" "proteoformVertices.tsv";
         #end if
@@ -172,7 +168,7 @@
                   <param name="match_type_selector" type="select" label="Match type"
                       help="">
                       <option value="proteoforms">Proteoforms</option>
-                      <option value="peptideshakerzip_proteoforms">Proteoforms from Peptideshaker Archive</option>
+                      <option value="peptideshakerzip_proteoforms">Proteoforms from Peptideshaker 2.x Archive</option>
                       <option value="gene">Genes</option>
                       <option value="uniprot">Proteins - UniProt Accession list</option>
                       <option value="ensembl">Proteins - Ensembl identifier list</option>
@@ -181,7 +177,6 @@
                       <option value="rsid">Genetic variants - SNP rsId list</option>
                       <option value="peptide">Peptides - Simple list</option>
                       <option value="modifiedpeptide">Peptides - Peptide List with PTM types and sites</option>
-
                   </param>
 
 
@@ -202,6 +197,7 @@
                           <option value="SUBSET_NO_TYPES">SUBSET NO TYPES</option>
                           <option value="ONE">ONE</option>
                           <option value="ONE_NO_TYPES">ONE_NO_TYPES</option>
+                          <option value="ACCESSION">ACCESSION</option>
                       </param>
 
                       <param name="proteoform_range" type="integer" value="0" label="Integer range of error for PTM sites" optional="true"
@@ -209,7 +205,7 @@
                   </when>
 
                   <when value="peptideshakerzip_proteoforms">
-                      <param format="zip" name="input_peptideshakerzip_proteoforms" type="data" label="Proteoforms from Peptideshaker Archive"
+                      <param format="zip" name="input_peptideshakerzip_proteoforms" type="data" label="Proteoforms from Peptideshaker 2.x Archive"
                           help="A proteoform defines a specific state of a protein.
                           It is composed by the protein UniProt accession, isoform and set of post translational modifications.
                           The input file contains one line for each proteoform. Each PTM is specified using a modification
@@ -224,6 +220,7 @@
                           <option value="SUBSET_NO_TYPES">SUBSET NO TYPES</option>
                           <option value="ONE">ONE</option>
                           <option value="ONE_NO_TYPES">ONE_NO_TYPES</option>
+                          <option value="ACCESSION">ACCESSION</option>
                       </param>
 
                       <param name="proteoform_peptideshakerzip_range" type="integer" value="0" label="Integer range of error for PTM sites" optional="true"
@@ -295,14 +292,13 @@
                           <option value="SUBSET_NO_TYPES">SUBSET NO TYPES</option>
                           <option value="ONE">ONE</option>
                           <option value="ONE_NO_TYPES">ONE_NO_TYPES</option>
+                          <option value="ACCESSION">ACCESSION</option>
                       </param>
 
                       <param name="modifiedpeptide_ptm_range" type="integer" value="0" label="PTM position range" optional="true"
                           help="Integer number margin error for sites of PTMs. Only for modified peptides."/>
                   </when>
 
-
-
             </conditional>
 
         </repeat>
@@ -345,7 +341,15 @@
                   <param name="input_gene" value="genes.txt" ftype="txt" />
               </conditional>
           </repeat>
-          <output name="search" file="genes_search.tsv" ftype="tsv" compare="sim_size" delta="3000" />
+          <output name="search" ftype="tsv" >
+              <assert_contents>
+                  <has_line_matching expression="CFTR\tP13569\tR-HSA-383190\tHCO3- transport through ion channel\tR-HSA-382556\tABC-family proteins mediated transport\tR-HSA-382551\tTransport of small molecules"/>
+                  <has_line_matching expression="TGFB1\tP01137\tR-HSA-170850\tPhosphorylated SMAD2/3 dissociates from TGFBR\tR-HSA-170834\tSignaling by TGF-beta Receptor Complex\tR-HSA-162582\tSignal Transduction"/>
+                  <has_line_matching expression="SCNN1B\tP51168\tR-HSA-2682349\tRAF1:SGK:TSC22D3:WPP ubiquitinates SCNN channels\tR-HSA-382551\tTransport of small molecules\tR-HSA-382551\tTransport of small molecules"/>
+                  <has_line_matching expression="TNFRSF1A\tP19438\tR-HSA-5626988\tTNF-alpha:TNFR1 binds NSMAF\tR-HSA-75893\tTNF signaling\tR-HSA-162582\tSignal Transduction"/>
+                  <has_n_columns n="8" />
+              </assert_contents>
+          </output>
       </test>
 
       <!-- Test graphs from proteoforms -->
@@ -359,15 +363,82 @@
           </repeat>
           <param name="output_graphs" value="gg,gu,gp" />
           <output_collection name="graphs_files" type="list">
-<!--              <element name="geneExternalEdges" ftype="tsv" file="proteoforms_graphs/geneExternalEdges.tsv" compare="sim_size" delta="1000" /> -->
-              <element name="geneInternalEdges" ftype="tsv" file="proteoforms_graphs/geneInternalEdges.tsv" compare="sim_size" delta="1000"/>
-              <element name="geneVertices" ftype="tsv" file="proteoforms_graphs/geneVertices.tsv" compare="sim_size" delta="1000"/>
-              <element name="proteinExternalEdges" ftype="tsv" file="proteoforms_graphs/proteinExternalEdges.tsv" compare="sim_size" delta="10000"/>
-              <element name="proteinInternalEdges" ftype="tsv" file="proteoforms_graphs/proteinInternalEdges.tsv" compare="sim_size" delta="1000"/>
-              <element name="proteinVertices" ftype="tsv" file="proteoforms_graphs/proteinVertices.tsv" compare="sim_size" delta="1000"/>
-              <element name="proteoformExternalEdges" ftype="tsv" file="proteoforms_graphs/proteoformExternalEdges.tsv" compare="sim_size" delta="1000"/>
-              <element name="proteoformInternalEdges" ftype="tsv" file="proteoforms_graphs/proteoformInternalEdges.tsv" compare="sim_size" delta="1000"/>
-              <element name="proteoformVertices" ftype="tsv" file="proteoforms_graphs/proteoformVertices.tsv" compare="sim_size" delta="1000"/>
+              <element name="geneExternalEdges" ftype="tsv">
+                  <assert_contents>
+                      <has_line_matching expression="ERBB4\tSRC\tReaction\tR-HSA-1963586\tcatalyst\tcatalyst"/>
+                      <has_line_matching expression="RNF85\tSRC1\tReaction\tR-HSA-2316434\tcatalyst\tcatalyst"/>
+                      <has_line_matching expression="CBL2\tUNQ2500/PRO5800\tReaction\tR-HSA-5654677\tinput\tinput"/>
+                      <has_line_matching expression="FN1\tPRKM3\tReaction\tR-HSA-5672973\tcatalyst\toutput"/>
+                      <has_line_matching expression="MAPK11\tMAPK12\tSet\tR-HSA-448855\tmember/candidate\tmember/candidate"/>
+                      <has_line_matching expression="SAPK2B\tSAPK3\tSet\tR-HSA-448855\tmember/candidate\tmember/candidate"/>
+                      <has_n_columns n="6" />
+                  </assert_contents>
+              </element>
+              <element name="geneInternalEdges" ftype="tsv" file="proteoforms_graphs/geneInternalEdges.tsv" compare="sim_size" delta="10"/>
+              <element name="geneVertices" ftype="tsv">
+                  <assert_contents>
+                      <has_line_matching expression="ERK1\tMitogen-activated protein kinase 3 shortName:MAP kinase 3 shortName:MAPK 3 ecNumber2.7.11.24/ecNumber"/>
+                      <has_line_matching expression="CREB1\tCyclic AMP-responsive element-binding protein 1 shortName:CREB-1 shortName:cAMP-responsive element-binding protein 1 ]"/>
+                      <has_line_matching expression="SAPK2\tMitogen-activated protein kinase 11 shortName:MAP kinase 11 shortName:MAPK 11 ecNumber2.7.11.24/ecNumber"/>
+                      <has_line_matching expression="STAT5\tSignal transducer and activator of transcription 5A ]"/>
+                      <has_n_columns n="2" />
+                  </assert_contents>
+              </element>
+              <element name="proteinExternalEdges" ftype="tsv">
+                  <assert_contents>
+                      <has_line_matching expression="P12931\tP56975\tReaction\tR-HSA-1963586\tcatalyst\toutput"/>
+                      <has_line_matching expression="P12931\tP51617\tReaction\tR-HSA-2316434\tcatalyst\tregulator"/>
+                      <has_line_matching expression="P12931\tQ8N302\tReaction\tR-HSA-6802933\tcatalyst\tinput"/>
+                      <has_line_matching expression="O95352\tP12931\tComplex\tR-HSA-6802695\tcomponent\tcomponent"/>
+                      <has_line_matching expression="Q15759\tQ16539\tSet\tR-HSA-198703\tmember/candidate\tmember/candidate"/>
+                      <has_n_columns n="6" />
+                  </assert_contents>
+              </element>
+              <element name="proteinInternalEdges" ftype="tsv">
+                  <assert_contents>
+                      <has_line_matching expression="P00519\tP12931\tReaction\tR-HSA-8942607\tcatalyst\tcatalyst"/>
+                      <has_line_matching expression="P12931\tQ9UQC2\tReaction\tR-HSA-205234\toutput\toutput"/>
+                      <has_line_matching expression="P12931\tP27361\tReaction\tR-HSA-6802933\tinput\toutput"/>
+                      <has_line_matching expression="P12931\tP27361\tReaction\tR-HSA-6802910\tinput\tinput"/>
+                      <has_line_matching expression="P00519\tP12931\tSet\tR-HSA-8942611\tmember/candidate\tmember/candidate"/>
+                      <has_line_matching expression="P12931\tP42229\tComplex\tR-HSA-1469999\tcomponent\tcomponent"/>
+                      <has_n_columns n="6" />
+                  </assert_contents>
+              </element>
+              <element name="proteinVertices" ftype="tsv">
+                  <assert_contents>
+                      <has_line_matching expression="P16220\tCyclic AMP-responsive element-binding protein 1 shortName:CREB-1 shortName:cAMP-responsive element-binding protein 1 ]"/>
+                      <has_line_matching expression="P27361\tMitogen-activated protein kinase 3 shortName:MAP kinase 3 shortName:MAPK 3 ecNumber2.7.11.24/ecNumber"/>
+                      <has_line_matching expression="P42229\tSignal transducer and activator of transcription 5A ]"/>
+                      <has_n_columns n="2" />
+                  </assert_contents>
+              </element>
+              <element name="proteoformExternalEdges" ftype="tsv">
+                  <assert_contents>
+                      <has_line_matching expression="O43597;00048:55,00048:227\tP22681;\tComplex\tR-HSA-934576\tcomponent\tcomponent"/>
+                      <has_line_matching expression="P00533;00048:992,00048:1045,00048:1068,00048:1086,00048:1148,00048:1173\tP22681;\tComplex\tR-HSA-182935\tcomponent\tcomponent"/>
+                      <has_line_matching expression="P27361;00047:202,00048:204\tP28482;00047:185,00048:187\tSet\tR-HSA-450307\tmember/candidate\tmember/candidate"/>
+                      <has_line_matching expression="P12931;00048:419\tQ01196;\tComplex\tR-HSA-8937687\tcomponent\tcomponent"/>
+                      <has_line_matching expression="P15509;\tP42229;00048:694\tSet\tR-HSA-913465\tmember/candidate\tmember/candidate"/>
+                      <has_n_columns n="6" />
+                  </assert_contents>
+              </element>
+              <element name="proteoformInternalEdges" ftype="tsv">
+                  <assert_contents>
+                      <has_line_matching expression="P27361;00047:202,00048:204\tQ15759;00047:180,00048:182\tSet\tR-HSA-450307\tmember/candidate\tmember/candidate"/>
+                      <has_line_matching expression="P27361;00047:202,00048:204\tQ15759;00047:180,00048:182\tSet\tR-HSA-450307\tmember/candidate\tmember/candidate"/>
+                      <has_n_columns n="6" />
+                  </assert_contents>
+              </element>
+              <element name="proteoformVertices" ftype="tsv">
+                  <assert_contents>
+                      <has_line_matching expression="P16220;00046:133\tCyclic AMP-responsive element-binding protein 1 shortName:CREB-1 shortName:cAMP-responsive element-binding protein 1 ]"/>
+                      <has_line_matching expression="Q15759;00047:180,00048:182\tMitogen-activated protein kinase 11 shortName:MAP kinase 11 shortName:MAPK 11 ecNumber2.7.11.24/ecNumber"/>
+                      <has_line_matching expression="P42229;00048:694\tSignal transducer and activator of transcription 5A ]"/>
+                      <has_n_columns n="2" />
+                  </assert_contents>
+              </element>
+
           </output_collection>
       </test>
 
@@ -418,13 +489,44 @@
 
 **Try it now**
 
-You can easily test PathwayMatcher functionality using the example files we provide with proteoforms and proteins information of Cystic Fibrosis:
+You can easily test PathwayMatcher functionality creating text files with the example data we provide with proteoforms and proteins information of Cystic Fibrosis:
 
-https://media.githubusercontent.com/media/PathwayAnalysisPlatform/PathwayMatcher/master/src/test/resources/Proteoforms/Simple/CysticFibrosis.txt
+**Proteoforms**:
+
+::
 
-https://media.githubusercontent.com/media/PathwayAnalysisPlatform/PathwayMatcher/master/src/test/resources/Proteins/UniProt/CysticFibrosis.txt
+ P01137;
+ P10145;
+ P12318;
+ P12318;00048:288,00048:304
+ P13569;
+ P13569;01148:null
+ P19438;
+ P37088;
+ P37088;01148:null
+ P51168;
+ P51170;
+ P51170;01148:null
+ Q14CN2;
+ Q16623;
+ Q9UJW0;
 
-You can upload them to Galaxy by directly copying and pasting their URL into the Galaxy upload dialog (the button with the arrow pointing up in the top-left area, and then choosing *Pasta/Fetch data*).
+**Proteins**:
+::
+
+  P13569
+  P01137
+  P12318
+  Q9UJW0
+  P51168
+  P51170
+  P37088
+  P19438
+  Q14CN2
+  Q16623
+  P10145
+
+After copying and pasting this data into new text files, you can upload them to Galaxy by directly using the Galaxy upload dialog (the button with the arrow pointing up in the top-left area, and then choosing *Choose local file*).
 
 Once they appear in green in your history, they have been uploaded and you can use them as inputs in PathwayMatcher.
 
@@ -462,9 +564,9 @@
 http://www.nature.com/nbt/journal/v26/n8/full/nbt0808-864.html
 
 .. _dbSNP: https://www.ncbi.nlm.nih.gov/projects/SNP/
-.. _PathwayMatcher: https://github.com/LuisFranciscoHS/PathwayMatcher
-.. _Input: https://github.com/LuisFranciscoHS/PathwayMatcher/wiki/Input
-.. _Output: https://github.com/LuisFranciscoHS/PathwayMatcher/wiki/Output
+.. _PathwayMatcher: https://github.com/PathwayAnalysisPlatform/PathwayMatcher
+.. _Input: https://github.com/PathwayAnalysisPlatform/PathwayMatcher/wiki/Input
+.. _Output: https://github.com/PathwayAnalysisPlatform/PathwayMatcher/wiki/Output
 
     </help>