Mercurial > repos > jjohnson > msfragger
comparison msfragger.xml @ 0:9cbe178a3e3c draft
Uploaded
author | jjohnson |
---|---|
date | Tue, 29 Aug 2017 14:30:36 -0400 |
parents | |
children | a2e41fca9e39 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:9cbe178a3e3c |
---|---|
1 <tool id="msfragger" name="MSFragger" version="1.0.0"> | |
2 <description>search for peptide identifications in mass spectrometry proteomics</description> | |
3 <command> | |
4 <![CDATA[ | |
5 #import re | |
6 ## Is file naming going to be a problem? May need to have a name param | |
7 #set $param_file = 'fragger.params' | |
8 cat $fragger_params > '$param_file' | |
9 ## | |
10 && echo " " >> '$param_file' | |
11 && echo "num_threads = \$GALAXY_SLOTS" >> '$param_file' | |
12 && cat $param_file > '$output_params' | |
13 && cat /Users/jj/gxt/gxt/database/files/009/dataset_9448.dat > Uniprot.20151009.Hs.revDecoys.fa | |
14 #if $input.extension == 'mzml': | |
15 #set $file_ext = 'mzML' | |
16 #elif $input.extension == 'mzxml': | |
17 #set $file_ext = 'mzXML' | |
18 #elif $input.extension == 'mgf': | |
19 #set $file_ext = 'mgf' | |
20 #end if | |
21 #if $input_prefix and len($input_prefix.strip()) > 0: | |
22 #set $input_path = $input_prefix.__str__ + "_rep" + str($i + 1) + "." + $file_ext | |
23 #else: | |
24 #set $input_path = $re.sub('\.[mM]\w+$','',$re.sub('[^-a-zA-Z0-9_.]','_',$input.name)) + "." + $file_ext | |
25 #end if | |
26 && ln -s '${input}' '$input_path' | |
27 && java -Xmx8G -jar $__tool_directory__/MSFragger.jar fragger.params '$input_path' | |
28 ## && cat *.log | |
29 >> "$logfile" | |
30 #if $output_format == 'pepXML': | |
31 && cat *.pep.xml > $output_pepxml | |
32 #else | |
33 && cat *.tsv > $output_tsv | |
34 #end if | |
35 ]]> | |
36 </command> | |
37 <configfiles> | |
38 <configfile name="fragger_params"><![CDATA[#slurp | |
39 #import re | |
40 database_name = $database ## Uniprot.20151009.Hs.revDecoys.fa | |
41 ## num_threads = 0 ## 0=poll CPU to set num threads; else specify num threads directly (max 64) | |
42 | |
43 ## Search Tolerances | |
44 | |
45 precursor_mass_tolerance = #if str( $search_tolerances.mass.precursor_mass_tolerance ) then $search_tolerances.mass.precursor_mass_tolerance else 500.00# | |
46 precursor_mass_units = #if $search_tolerances.mass.units == 'Da' then 0 else 1# ## 0=Daltons, 1=ppm | |
47 precursor_true_tolerance = #if str( $search_tolerances.mass.precursor_true_tolerance ) then $search_tolerances.mass.precursor_true_tolerance else 20.00# | |
48 precursor_true_units = #if $search_tolerances.mass.units == 'Da' then 0 else 1# ## 0=Daltons, 1=ppm | |
49 fragment_mass_tolerance = #if str( $search_tolerances.mass.fragment_mass_tolerance ) then $search_tolerances.mass.fragment_mass_tolerance else 20.00# | |
50 fragment_mass_units = #if $search_tolerances.mass.units == 'Da' then 0 else 1# ## 0=Daltons, 1=ppm | |
51 isotope_error = $search_tolerances.isotope_error ## 0=off, 0/1/2 (standard C13 error) | |
52 | |
53 ## digest parameters | |
54 | |
55 search_enzyme_name = #if $digestion.search_enzyme_name then $digestion.search_enzyme_name else 'Trypsin'# ## Trypsin | |
56 search_enzyme_cutafter = #if $digestion.search_enzyme_cutafter then $digestion.search_enzyme_cutafter else 'KR'# ##KR | |
57 search_enzyme_butnotafter = #if $digestion.search_enzyme_butnotafter then $digestion.search_enzyme_butnotafter else 'P'# ##P | |
58 | |
59 num_enzyme_termini = $digestion.num_enzyme_termini ## 2 for enzymatic, 1 for semi-enzymatic, 0 for nonspecific digestion | |
60 allowed_missed_cleavage = #if $digestion.allowed_missed_cleavage then $digestion.allowed_missed_cleavage else 1# ## maximum value is 5 | |
61 | |
62 digest_min_length = #if $digestion.digest_min_length then $digestion.digest_min_length else 7# | |
63 digest_max_length = #if $digestion.digest_max_length then $digestion.digest_max_length else 50# | |
64 digest_mass_range = #if str( $digestion.digest_mass_range_min ) then $digestion.digest_mass_range_min else 500.0# #if $digestion.digest_mass_range_max is not None then $digestion.digest_mass_range_max else 5000.0# ## MH+ peptide mass range to analyze | |
65 | |
66 | |
67 ## Variable Modification Parameters | |
68 | |
69 clip_nTerm_M = $variable_modification.clip_nTerm_M | |
70 | |
71 ##maximum of 7 mods - amino acid codes, * for any amino acid, [ and ] specifies protein termini, n and c specifies peptide termini | |
72 ##variable_mod_01 = 15.9949 M | |
73 ##variable_mod_02 = 42.0106 [* | |
74 ##variable_mod_03 = 79.96633 STY | |
75 ##variable_mod_03 = -17.0265 nQnC | |
76 ##variable_mod_04 = -18.0106 nE | |
77 | |
78 allow_multiple_variable_mods_on_residue = $variable_modification.allow_multiple_variable_mods_on_residue ## static mods are not considered | |
79 max_variable_mods_per_mod = #if str( $variable_modification.max_variable_mods_per_mod ) then $variable_modification.max_variable_mods_per_mod else 3# ## maximum of 5 | |
80 max_variable_mods_combinations = #if str( $variable_modification.max_variable_mods_combinations ) then $variable_modification.max_variable_mods_combinations else 1000# ## maximum of 65534, limits number of modified peptides generated from sequence | |
81 | |
82 #set $vmods = $re.findall('([+-]?\d+[.]\d+\s\S+)',str($variable_modification.variable_mods)) | |
83 #for i,vmod in enumerate($vmods): | |
84 variable_mod_0${int(i)+1} = $vmod | |
85 #end for | |
86 | |
87 ##open search parameters | |
88 track_zero_topN = #if str( $open_search.track_zero_topN ) then $open_search.track_zero_topN else 0# ## in addition to topN results, keep track of top results in zero bin | |
89 zero_bin_accept_expect = #if str( $open_search.zero_bin_accept_expect ) then $open_search.zero_bin_accept_expect else 0# ## boost top zero bin entry to top if it has expect under 0.01 - set to 0 to disable | |
90 zero_bin_mult_expect = #if str( $open_search.zero_bin_mult_expect ) then $open_search.zero_bin_mult_expect else 1# ## disabled if above passes - multiply expect of zero bin for ordering purposes (does not affect reported expect) | |
91 add_topN_complementary = #if str( $open_search.add_topN_complementary ) then $open_search.add_topN_complementary else 0# | |
92 | |
93 ## spectral processing | |
94 | |
95 minimum_peaks = #if str( $spectrum_processing.minimum_peaks ) then $spectrum_processing.minimum_peaks else 15# ## required minimum number of peaks in spectrum to search (default 10) | |
96 use_topN_peaks = #if str( $spectrum_processing.use_topN_peaks ) then $spectrum_processing.use_topN_peaks else 100# | |
97 | |
98 | |
99 minimum_ratio = #if str($spectrum_processing.minimum_ratio) then $spectrum_processing.minimum_ratio else 0.01# ## filter peaks below this fraction of strongest peak | |
100 | |
101 override_charge = $spectrum_processing.precursor.override_charge | |
102 #if $spectrum_processing.precursor.override_charge == 1 | |
103 precursor_charge = $spectrum_processing.precursor.precursor_charge_min $spectrum_processing.precursor.precursor_charge_max ## precursor charge range to analyze; does not override any existing charge; 0 as 1st entry ignores parameter | |
104 #end if | |
105 max_fragment_charge = #if str( $spectrum_processing.max_fragment_charge ) then $spectrum_processing.max_fragment_charge else 2# ## set maximum fragment charge state to analyze (allowed max 5) | |
106 #if str( $spectrum_processing.clear.clear_mz_range_min ) or str( $spectrum_processing.clear.clear_mz_range_max ) | |
107 clear_mz_range = #if str( $spectrum_processing.clear.clear_mz_range_min ) then $spectrum_processing.clear.clear_mz_range_min else 0.0# #if str( $spectrum_processing.clear.clear_mz_range_max ) then $spectrum_processing.clear.clear_mz_range_max else 0.0# | |
108 #else | |
109 clear_mz_range = 0.0 0.0 ## for iTRAQ/TMT type data; will clear out all peaks in the specified m/z range | |
110 #end if | |
111 | |
112 | |
113 min_fragments_modelling = #if str( $modeling_output.min_fragments_modelling ) then $modeling_output.min_fragments_modelling else 3# | |
114 min_matched_fragments = #if str( $modeling_output.min_matched_fragments ) then $modeling_output.min_matched_fragments else 6# | |
115 output_report_topN = #if $modeling_output.output_report_topN is not None then $modeling_output.output_report_topN else 1# | |
116 output_max_expect = #if str( $modeling_output.output_max_expect ) then $modeling_output.output_max_expect else 50.0# | |
117 | |
118 output_file_extension = #if $output_format == 'pepXML' then 'pep.xml' else 'tsv'# ##pepXML | |
119 output_format = $output_format ##pepXML or tsv | |
120 | |
121 ## additional modifications | |
122 static_modification | |
123 s/^\(add_[^ ]*\) = \([0-9.]*\)/\1 = #if str( $static_modification.\1 ) then $static_modification.\1 else \2#/ | |
124 | |
125 add_Cterm_peptide = #if str( $static_modification.add_Cterm_peptide ) then $static_modification.add_Cterm_peptide else 0.0# | |
126 add_Nterm_peptide = #if str( $static_modification.add_Nterm_peptide ) then $static_modification.add_Nterm_peptide else 0.0# | |
127 add_Cterm_protein = #if str( $static_modification.add_Cterm_protein ) then $static_modification.add_Cterm_protein else 0.0# | |
128 add_Nterm_protein = #if str( $static_modification.add_Nterm_protein ) then $static_modification.add_Nterm_protein else 0.0# | |
129 add_G_glycine = #if str( $static_modification.add_G_glycine ) then $static_modification.add_G_glycine else 0.0000# ## added to G - avg. 57.0513, mono. 57.02146 | |
130 add_A_alanine = #if str( $static_modification.add_A_alanine ) then $static_modification.add_A_alanine else 0.0000# ## added to A - avg. 71.0779, mono. 71.03711 | |
131 add_S_serine = #if str( $static_modification.add_S_serine ) then $static_modification.add_S_serine else 0.0000# ## added to S - avg. 87.0773, mono. 87.03203 | |
132 add_P_proline = #if str( $static_modification.add_P_proline ) then $static_modification.add_P_proline else 0.0000# ## added to P - avg. 97.1152, mono. 97.05276 | |
133 add_V_valine = #if str( $static_modification.add_V_valine ) then $static_modification.add_V_valine else 0.0000# ## added to V - avg. 99.1311, mono. 99.06841 | |
134 add_T_threonine = #if str( $static_modification.add_T_threonine ) then $static_modification.add_T_threonine else 0.0000# ## added to T - avg. 101.1038, mono. 101.04768 | |
135 add_C_cysteine = #if str( $static_modification.add_C_cysteine ) then $static_modification.add_C_cysteine else 0.0000# ## added to C - avg. 103.1429, mono. 103.00918 | |
136 add_L_leucine = #if str( $static_modification.add_L_leucine ) then $static_modification.add_L_leucine else 0.0000# ## added to L - avg. 113.1576, mono. 113.08406 | |
137 add_I_isoleucine = #if str( $static_modification.add_I_isoleucine ) then $static_modification.add_I_isoleucine else 0.0000# ## added to I - avg. 113.1576, mono. 113.08406 | |
138 add_N_asparagine = #if str( $static_modification.add_N_asparagine ) then $static_modification.add_N_asparagine else 0.0000# ## added to N - avg. 114.1026, mono. 114.04293 | |
139 add_D_aspartic_acid = #if str( $static_modification.add_D_aspartic_acid ) then $static_modification.add_D_aspartic_acid else 0.0000# ## added to D - avg. 115.0874, mono. 115.02694 | |
140 add_Q_glutamine = #if str( $static_modification.add_Q_glutamine ) then $static_modification.add_Q_glutamine else 0.0000# ## added to Q - avg. 128.1292, mono. 128.05858 | |
141 add_K_lysine = #if str( $static_modification.add_K_lysine ) then $static_modification.add_K_lysine else 0.0000# ## added to K - avg. 128.1723, mono. 128.09496 | |
142 add_E_glutamic_acid = #if str( $static_modification.add_E_glutamic_acid ) then $static_modification.add_E_glutamic_acid else 0.0000# ## added to E - avg. 129.1140, mono. 129.04259 | |
143 add_M_methionine = #if str( $static_modification.add_M_methionine ) then $static_modification.add_M_methionine else 0.0000# ## added to M - avg. 131.1961, mono. 131.04048 | |
144 add_H_histidine = #if str( $static_modification.add_H_histidine ) then $static_modification.add_H_histidine else 0.0000# ## added to H - avg. 137.1393, mono. 137.05891 | |
145 add_F_phenylalanine = #if str( $static_modification.add_F_phenylalanine ) then $static_modification.add_F_phenylalanine else 0.0000# ## added to F - avg. 147.1739, mono. 147.06841 | |
146 add_R_arginine = #if str( $static_modification.add_R_arginine ) then $static_modification.add_R_arginine else 0.0000# ## added to R - avg. 156.1857, mono. 156.10111 | |
147 add_Y_tyrosine = #if str( $static_modification.add_Y_tyrosine ) then $static_modification.add_Y_tyrosine else 0.0000# ## added to Y - avg. 163.0633, mono. 163.06333 | |
148 add_W_tryptophan = #if str( $static_modification.add_W_tryptophan ) then $static_modification.add_W_tryptophan else 0.0000# ## added to W - avg. 186.0793, mono. 186.07931 | |
149 #* | |
150 add_B_user_amino_acid = #if str( $static_modification.add_B_user_amino_acid ) then $static_modification.add_B_user_amino_acid else 0.0000# ## added to B - avg. 0.0000, mono. 0.00000 | |
151 add_J_user_amino_acid = #if str( $static_modification.add_J_user_amino_acid ) then $static_modification.add_J_user_amino_acid else 0.0000# ## added to J - avg. 0.0000, mono. 0.00000 | |
152 add_O_user_amino_acid = #if str( $static_modification.add_O_user_amino_acid ) then $static_modification.add_O_user_amino_acid else 0.0000# ## added to O - avg. 0.0000, mono 0.00000 | |
153 add_U_user_amino_acid = #if str( $static_modification.add_U_user_amino_acid ) then $static_modification.add_U_user_amino_acid else 0.0000# ## added to U - avg. 0.0000, mono. 0.00000 | |
154 add_X_user_amino_acid = #if str( $static_modification.add_X_user_amino_acid ) then $static_modification.add_X_user_amino_acid else 0.0000# ## added to X - avg. 0.0000, mono. 0.00000 | |
155 add_Z_user_amino_acid = #if str( $static_modification.add_Z_user_amino_acid ) then $static_modification.add_Z_user_amino_acid else 0.0000# ## added to Z - avg. 0.0000, mono. 0.00000 | |
156 *# | |
157 #slurp]]> | |
158 </configfile> | |
159 </configfiles> | |
160 | |
161 <inputs> | |
162 <param name="input" type="data" format="mzml,mzxml" label="Proteomics Spectrum files in mzML or mzXML format"/> | |
163 <param name="input_prefix" type="text" value="" optional="true" label="File name prefix" help="Names inputs: prefix_rep#.mzXML Leave blank to use History names of inputs"> | |
164 <validator type="regex" message="">[a-zA-Z][a-zA-Z0-9_-]*</validator> | |
165 </param> | |
166 <param name="database" type="data" format="fasta" label="Proteomics Search Database in FASTA format"/> | |
167 <section name="search_tolerances" expanded="false" title="Search Tolerances"> | |
168 <conditional name="mass"> | |
169 <param name="units" type="select" label="Set Mass tolerances" help="Sets default parameters"> | |
170 <option value="Da">Daltons</option> | |
171 <option value="ppm">ppm</option> | |
172 </param> | |
173 <when value="Da"> | |
174 <!-- Need to get correct defaults for Daltions --> | |
175 <param name="precursor_mass_tolerance" type="float" value="20" min="1" max="100" optional="true" label="Precursor mass tolerance" | |
176 help="Precursor mass tolerance (window is +/- this value) Default: 20"/> | |
177 <param name="precursor_true_tolerance" type="float" value="0" min="1" max="100" optional="true" label="Precursor true tolerance" | |
178 help="True precursor mass tolerance (window is +/- this value). Used for tie breaker of results (in spectrally ambiguous cases) and zero bin boosting in open searches (0 disables these features). This option is STRONGLY recommended for open searches. Default: 0"/> | |
179 <param name="fragment_mass_tolerance" type="float" value="20" min="1" max="100" optional="true" label="Fragment mass tolerance" | |
180 help="Fragment mass tolerance (window is +/- this value) Default: 20"/> | |
181 </when> | |
182 <when value="ppm"> | |
183 <!-- Should these be integer for ppm? --> | |
184 <param name="precursor_mass_tolerance" type="float" value="20" min="1" max="100" optional="true" label="Precursor mass tolerance" | |
185 help="Precursor mass tolerance (window is +/- this value) Default: 20"/> | |
186 <param name="precursor_true_tolerance" type="float" value="0.0" min="0.0" max="100" optional="true" label="Precursor true tolerance" | |
187 help="True precursor mass tolerance (window is +/- this value). Used for tie breaker of results (in spectrally ambiguous cases) and zero bin boosting in open searches (0 disables these features). This option is STRONGLY recommended for open searches. Default: 0"/> | |
188 <param name="fragment_mass_tolerance" type="float" value="20" min="1" max="100" optional="true" label="Fragment mass tolerance" | |
189 help="Fragment mass tolerance (window is +/- this value) Default: 20"/> | |
190 </when> | |
191 </conditional> | |
192 <param name="isotope_error" type="select" label="isotope_error"> | |
193 <help> | |
194 Isotope correction for MS/MS events triggered on isotopic peaks. Should be set to 0 (disabled) for open search or 0/1/2 for correction of narrow window searches. Shifts the precursor mass window to multiples of this value multiplied by the mass of C13-C12. | |
195 </help> | |
196 <option value="0">0 - Disabled (for open search)</option> | |
197 <option value="0/1/2">0/1/2 - (Correction for narrow window searches)</option> | |
198 </param> | |
199 </section> | |
200 | |
201 <section name="digestion" expanded="false" title="In-silico Digestion Parameters"> | |
202 <param name="search_enzyme_name" type="text" value="Trypsin" optional="true" | |
203 label="Digestion Enzyme" help="Name of enzyme to be written to the pepXML file."/> | |
204 <param name="search_enzyme_cutafter" type="text" value="" optional="true" | |
205 label="Residues after which the enzyme cuts" help="search_enzyme_cutafter Residues after which the enzyme cuts Default: KR"/> | |
206 <param name="search_enzyme_butnotafter" type="text" value="" optional="true" | |
207 label="Residues that the enzyme will not cut before" help="search_enzyme_butnotafter - (misnomer: should really be called butnotbefore) Default: P"/> | |
208 <param name="num_enzyme_termini" type="select" label="Number of enzyme termini"> | |
209 <option value="0">0 - non-enzymatic</option> | |
210 <option value="1">1 - semi-enzymatic</option> | |
211 <option value="2">2 - fully-enzymatic</option> | |
212 </param> | |
213 | |
214 <param name="allowed_missed_cleavage" type="integer" value="2" min="0" max="5" optional="true" | |
215 label="Allowed number of missed cleavages"/> | |
216 <param name="digest_min_length" type="integer" value="7" min="1" max="30" optional="true" | |
217 label="Minimum length of peptides to be generated during in-silico digestion"/> | |
218 <param name="digest_max_length" type="integer" value="64" min="10" max="100" optional="true" | |
219 label="Maximum length of peptides to be generated during in-silico digestion"/> | |
220 <param name="digest_mass_range_min" type="float" value="500.0" min="0.0" optional="true" | |
221 label="Minimum Mass of peptides to be generated during in-silico digestion in Daltons" /> | |
222 <param name="digest_mass_range_max" type="float" value="5000.0" min="0.0" optional="true" | |
223 label="Maximum Mass of peptides to be generated during in-silico digestion in Daltons" /> | |
224 | |
225 </section> | |
226 | |
227 <section name="variable_modification" expanded="false" title="Variable Modifications"> | |
228 <param name="variable_mods" type="text" area="True" size="120x7" value="" optional="true" | |
229 label=""> | |
230 <help><![CDATA[ | |
231 Sets variable modifications. (variable_mod_01 to variable_mod_07). Space separated values with 1st value being the modification mass and the second being the residues (specified consecutively as a string) it modifies. | |
232 * is used to represent any amino acid [ is a modifier for protein N-terminal | |
233 ] is a modifier for protein C-terminal | |
234 n is a modifier for peptide N-terminal c is a modifier for peptide C-terminal | |
235 Syntax Examples: | |
236 15.9949 M (for oxidation on methionine) | |
237 79.66331 STY (for phosphorylation) | |
238 -17.0265 nQnC (for pyro-Glu or loss of ammonia at peptide N-terminal) | |
239 Example (M oxidation and N-terminal acetylation): | |
240 variable_mod_01 = 15.9949 M variable_mod_02 = 42.0106 [* | |
241 ]]></help> | |
242 <!-- regex working in python, but not webform --> | |
243 <validator type="regex">^(?ms)(([+-]?\d+[.]\d+\s\S+)(\s+[+-]?\d+[.]\d+\s\S+)*)?$</validator> | |
244 <sanitizer sanitize="False"/> | |
245 </param> | |
246 <param name="clip_nTerm_M" type="boolean" truevalue="1" falsevalue="0" checked="false" | |
247 label="Trim protein N-terminal methionine as a variable modification"/> | |
248 <param name="allow_multiple_variable_mods_on_residue" type="boolean" truevalue="1" falsevalue="0" checked="true" | |
249 label="Allow each amino acid to be modified by multiple variable modifications"/> | |
250 <param name="max_variable_mods_per_mod" type="integer" value="2" min="0" max="5" optional="true" | |
251 label="Maximum number of residues that can be occupied by each variable modification"/> | |
252 <param name="max_variable_mods_combinations" type="integer" value="5000" min="0" max="65534" optional="true" | |
253 label="Maximum allowed number of modified variably modified peptides from each peptide sequence" | |
254 help="If a greater number than the maximum is generated, only the unmodified peptide is considered."/> | |
255 </section> | |
256 <section name="static_modification" expanded="false" title="Static Modifications"> | |
257 <param name="add_Cterm_peptide" type="float" value="" optional="true" | |
258 label="Statically add mass in Da to C-terminal of peptide Default: 0.0" /> | |
259 <param name="add_Nterm_peptide" type="float" value="" optional="true" | |
260 label="Statically add mass in Da to N-terminal of peptide Default: 0.0" /> | |
261 <param name="add_Cterm_protein" type="float" value="" optional="true" | |
262 label="Statically add mass in Da to C-terminal of protein Default: 0.0" /> | |
263 <param name="add_Nterm_protein" type="float" value="" optional="true" | |
264 label="Statically add mass in Da to N-terminal of protein Default: 0.0" /> | |
265 <param name="add_A_alanine" type="float" value="" optional="true" | |
266 label="Statically add mass in Da to A (alanine) Default: 0.0" /> | |
267 <param name="add_R_arginine" type="float" value="" optional="true" | |
268 label="Statically add mass in Da to R (arginine) Default: 0.0" /> | |
269 <param name="add_N_asparagine" type="float" value="" optional="true" | |
270 label="Statically add mass in Da to N (asparagine) Default: 0.0" /> | |
271 <param name="add_D_aspartic_acid" type="float" value="" optional="true" | |
272 label="Statically add mass in Da to D (aspartic_acid) Default: 0.0" /> | |
273 <param name="add_C_cysteine" type="float" value="" optional="true" | |
274 label="Statically add mass in Da to C (cysteine) Default: 0.0" /> | |
275 <param name="add_E_glutamic_acid" type="float" value="" optional="true" | |
276 label="Statically add mass in Da to E (glutamic_acid) Default: 0.0" /> | |
277 <param name="add_Q_glutamine" type="float" value="" optional="true" | |
278 label="Statically add mass in Da to Q (glutamine) Default: 0.0" /> | |
279 <param name="add_G_glycine" type="float" value="" optional="true" | |
280 label="Statically add mass in Da to G (glycine) Default: 0.0" /> | |
281 <param name="add_H_histidine" type="float" value="" optional="true" | |
282 label="Statically add mass in Da to H (histidine) Default: 0.0" /> | |
283 <param name="add_I_isoleucine" type="float" value="" optional="true" | |
284 label="Statically add mass in Da to I (isoleucine) Default: 0.0" /> | |
285 <param name="add_L_leucine" type="float" value="" optional="true" | |
286 label="Statically add mass in Da to L (leucine) Default: 0.0" /> | |
287 <param name="add_K_lysine" type="float" value="" optional="true" | |
288 label="Statically add mass in Da to K (lysine) Default: 0.0" /> | |
289 <param name="add_M_methionine" type="float" value="" optional="true" | |
290 label="Statically add mass in Da to M (methionine) Default: 0.0" /> | |
291 <param name="add_F_phenylalanine" type="float" value="" optional="true" | |
292 label="Statically add mass in Da to F (phenylalanine) Default: 0.0" /> | |
293 <param name="add_P_proline" type="float" value="" optional="true" | |
294 label="Statically add mass in Da to P (proline) Default: 0.0" /> | |
295 <param name="add_S_serine" type="float" value="" optional="true" | |
296 label="Statically add mass in Da to S (serine) Default: 0.0" /> | |
297 <param name="add_T_threonine" type="float" value="" optional="true" | |
298 label="Statically add mass in Da to T (threonine) Default: 0.0" /> | |
299 <param name="add_W_tryptophan" type="float" value="" optional="true" | |
300 label="Statically add mass in Da to W (tryptophan) Default: 0.0" /> | |
301 <param name="add_Y_tyrosine" type="float" value="" optional="true" | |
302 label="Statically add mass in Da to Y (tyrosine) Default: 0.0" /> | |
303 <param name="add_V_valine" type="float" value="" optional="true" | |
304 label="Statically add mass in Da to V (valine) Default: 0.0" /> | |
305 </section> | |
306 <section name="spectrum_processing" expanded="false" title="Spectrum Processing"> | |
307 <param name="minimum_peaks" type="integer" value="10" min="0" optional="true" | |
308 label="Minimum number of peaks in experimental spectrum for matching" /> | |
309 <param name="use_topN_peaks" type="integer" value="50" min="0" optional="true" | |
310 label="Pre-process experimental spectrum to only use top N peaks" /> | |
311 <param name="minimum_ratio" type="float" value="" min="0.0" optional="true" | |
312 label="Filter peaks by minimum_ratio of base peak" | |
313 help="Filters out all peaks in experimental spectrum less intense than this multiple of the base peak intensit Default: 0.0" /> | |
314 | |
315 <param name="max_fragment_charge" type="integer" value="2" min="1" max="4" optional="true" | |
316 label="Maximum charge state for theoretical fragments to match (1-4)" /> | |
317 | |
318 <conditional name="precursor"> | |
319 <param name="override_charge" type="select" label="Precursor Charge"> | |
320 <option value="0">Use precursor charge</option> | |
321 <option value="1">Ignore precursor charge and set range</option> | |
322 </param> | |
323 <when value="0"/> | |
324 <when value="1"> | |
325 <param name="precursor_charge_min" type="integer" value="1" min="0" max="6" | |
326 label="Minimum Potential Precursor Charge" /> | |
327 <param name="precursor_charge_max" type="integer" value="4" min="0" max="8" | |
328 label="Maximum Potential Precursor Charge" /> | |
329 </when> | |
330 </conditional> | |
331 <section name="clear" expanded="false" title="Clear mz range for iTRAQ/TMT experiments"> | |
332 <param name="clear_mz_range_min" type="float" value="0.0" min="0.0" optional="true" label="Minimum of m/z range to remove" /> | |
333 <param name="clear_mz_range_max" type="float" value="0.0" min="0.0" optional="true" label="Maximum of m/z range to remove" /> | |
334 </section> | |
335 </section> | |
336 <section name="open_search" expanded="false" title="Open Search"> | |
337 <param name="track_zero_topN" type="integer" value="" min="0" optional="true" | |
338 label="Track top N unmodified peptide results" > | |
339 <help> | |
340 Track top N unmodified peptide results separately from main results internally for boosting features. | |
341 Should be set to a number greater than output_report_topN if zero bin boosting is desired. | |
342 Default: 0 | |
343 </help> | |
344 </param> | |
345 <param name="zero_bin_accept_expect" type="float" value="" min="0.0" optional="true" | |
346 label="Ranks a zero-bin hit above all non-zero-bin hit if it has expectation less than this value." | |
347 help="Default: 0.0" /> | |
348 <param name="zero_bin_mult_expect" type="float" value="" min="0.0" optional="true" | |
349 label="Multiplies expect value of PSMs in the zero-bin during results ordering" | |
350 help="(set to less than 1 for boosting) Default: 1.0" /> | |
351 <param name="add_topN_complementary" type="integer" value="" min="0" optional="true" | |
352 label="Insert complementary ions corresponding to the top N most intense fragments in each experimental spectra." > | |
353 <help> | |
354 Useful for recovery of modified peptides near C-terminal in open search. | |
355 Should be set to 0 (disabled) otherwise. | |
356 Default: 0 | |
357 </help> | |
358 </param> | |
359 | |
360 </section> | |
361 <section name="modeling_output" expanded="false" title="Modeling and Output"> | |
362 <param name="min_fragments_modelling" type="integer" value="3" min="1" optional="true" | |
363 label="Minimum number of matched peaks in PSM for inclusion in statistical modeling Default:3" /> | |
364 <param name="min_matched_fragments" type="integer" value="4" min="1" optional="true" | |
365 label="Minimum number of matched peaks for PSM to be reported Default:3" | |
366 help="recommend a minimum of 4 for narrow window searching and 6 for open searches"/> | |
367 <param name="output_report_topN" type="integer" value="1" min="1" optional="true" | |
368 label="Reports top N PSMs per input spectrum Default:1" /> | |
369 <param name="output_max_expect" type="float" value="50.0" min="0.0" optional="true" | |
370 label="Suppresses reporting of PSM if top hit has expectation greater than this threshold" /> | |
371 </section> | |
372 <param name="output_format" type="select" label="output format"> | |
373 <option value="pepXML">pepXML</option> | |
374 <option value="tsv">Tabular</option> | |
375 </param> | |
376 </inputs> | |
377 | |
378 <outputs> | |
379 <data format="txt" name="output_params" label="${tool.name}.params"/> | |
380 <data format="txt" name="logfile" label="${tool.name} log"/> | |
381 <data format="tsv" name="output_tsv" label="${tool.name}.tsv ${on_string}" > | |
382 <filter>output_format == 'tsv'</filter> | |
383 </data> | |
384 <data format="pepxml" name="output_pepxml" label="${tool.name}.pep.xml ${on_string}" > | |
385 <filter>output_format == 'pepXML'</filter> | |
386 </data> | |
387 </outputs> | |
388 <tests> | |
389 <test> | |
390 </test> | |
391 </tests> | |
392 <help> | |
393 <![CDATA[ | |
394 ============= | |
395 **MSFragger** | |
396 ============= | |
397 | |
398 MSFragger is an ultrafast database search tool for peptide identifications in mass spectrometry-based proteomics. It differs from conventional search engines by computing similarity scores in a fragment-centric fashion using a theoretical fragment index of candidate peptides. The speed of MSFragger makes it particularly suitable for `open' database searches, where the precursor mass tolerance is set to hundreds of Daltons, for the identification of modified peptides. MSFragger is implemented in the cross-platform Java programming language and is compatible with standard proteomics file formats such as MGF/mzXML/mzML/pepXML. | |
399 | |
400 | |
401 **Common Static Modifications** | |
402 | |
403 :: | |
404 | |
405 - methylation of K : 14.015650 | |
406 - oxidation of M : 15.994915 | |
407 - carboxymethyl C : 58.005479 | |
408 - carbamidomethyl C : 57.021464 | |
409 - deamidation of N and Q : 0.984016 | |
410 - propionamide C : 71.037114 | |
411 - phosphorylation of S : 79.966331 | |
412 - phosphorylation of T : 79.966331 | |
413 - phosphorylation of Y : 79.966331 | |
414 - M cleavage from protein n-term : -131.040485 | |
415 - acetylation of protein n-term : 42.010565 | |
416 - methylation of protein n-term : 14.015650 | |
417 - tri-methylation of protein n-term : 42.046950 | |
418 - beta methythiolation of D : 45.987721 | |
419 - methylation of Q : 14.015650 | |
420 - tri-methylation of K : 42.046950 | |
421 - methylation of D : 14.015650 | |
422 - methylation of E : 14.015650 | |
423 - methylation of peptide c-term : 14.015650 | |
424 - tri-deuteromethylation of D : 17.034480 | |
425 - tri-deuteromethylation of E : 17.034480 | |
426 - tri-deuteromethylation of peptide c-term : 17.034480 | |
427 - n-formyl met addition : 159.035399 | |
428 - 2-amino-3-oxo-butanoic acid T : -2.015650 | |
429 - acetylation of K : 42.010565 | |
430 - amidation of peptide c-term : -0.984016 | |
431 - beta-methylthiolation of D (duplicate of 13) : 45.987721 | |
432 - carboxyamidomethylation of K : 57.021464 | |
433 - carboxyamidomethylation of H : 57.021464 | |
434 - carboxyamidomethylation of D : 57.021464 | |
435 - carboxyamidomethylation of E : 57.021464 | |
436 - carbamylation of K : 43.005814 | |
437 - carbamylation of n-term peptide : 43.005814 | |
438 - citrullination of R : 0.984016 | |
439 - oxidation of C to cysteic acid : 47.984744 | |
440 - di-iodination of Y : 251.793296 | |
441 - di-methylation of K : 28.031300 | |
442 - di-methylation of R : 28.031300 | |
443 - di-methylation of peptide n-term : 28.031300 | |
444 - oxidation of F to dihydroxyphenylalanine : 31.989829 | |
445 - gammathiopropionylation of K : 87.998285 | |
446 - gammathiopropionylation of peptide n-term : 87.998285 | |
447 - farnesylation of C : 204.187801 | |
448 - formylation of K : 27.994915 | |
449 - formylation of peptide n-term : 27.994915 | |
450 - oxidation of W to formylkynurenin : 31.989829 | |
451 - fluorophenylalanine : 17.990578 | |
452 - beta-carboxylation of D : 43.989829 | |
453 - gamma-carboxylation of E : 43.989829 | |
454 - geranyl-geranyl : 272.250401 | |
455 - glucuronylation of protein n-term : 176.032088 | |
456 - glutathione disulfide : 305.068156 | |
457 - ubiquitinylation residue : 114.042927 | |
458 - guanidination of K : 42.021798 | |
459 - oxidation of H to N : -23.015984 | |
460 - oxidation of H to D : -22.031969 | |
461 - homoserine : -29.992806 | |
462 - homoserine lactone : -48.003371 | |
463 - oxidation of W to hydroxykynurenin : 19.989829 | |
464 - hydroxylation of D : 15.994915 | |
465 - hydroxylation of K : 15.994915 | |
466 - hydroxylation of N : 15.994915 | |
467 - hydroxylation of P : 15.994915 | |
468 - hydroxylation of F : 15.994915 | |
469 - hydroxylation of Y : 15.994915 | |
470 - iodination of Y : 125.896648 | |
471 - oxidation of W to kynurenin : 3.994915 | |
472 - lipoyl K : 188.032956 | |
473 - methyl ester of peptide c-term (duplicate of 18) : 14.015650 | |
474 - methyl ester of D : 14.015650 | |
475 - methyl ester of E (duplicate of 17) : 14.015650 | |
476 - methyl ester of S : 14.015650 | |
477 - methyl ester of Y : 14.015650 | |
478 - methyl C : 14.015650 | |
479 - methyl H : 14.015650 | |
480 - methyl N : 14.015650 | |
481 - methylation of peptide n-term : 14.015650 | |
482 - methyl R : 14.015650 | |
483 - myristoleylation of G : 208.182715 | |
484 - myristoyl-4H of G : 206.167065 | |
485 - myristoylation of peptide n-term G : 210.198366 | |
486 - myristoylation of K : 210.198366 | |
487 - formylation of protein n-term : 27.994915 | |
488 - NEM C : 125.047679 | |
489 - NIPCAM : 99.068414 | |
490 - oxidation of W to nitro : 44.985078 | |
491 - oxidation of Y to nitro : 44.985078 | |
492 - O18 on peptide n-term : 2.004246 | |
493 - di-O18 on peptide n-term : 4.008490 | |
494 - oxidation of H : 15.994915 | |
495 - oxidation of W : 15.994915 | |
496 - phosphopantetheine S : 340.085794 | |
497 - palmitoylation of C : 238.229666 | |
498 - palmitoylation of K : 238.229666 | |
499 - palmitoylation of S : 238.229666 | |
500 - palmitoylation of T : 238.229666 | |
501 - phosphorylation of S with prompt loss : -18.010565 | |
502 - phosphorylation of T with prompt loss : -18.010565 | |
503 - phosphorylation with prompt loss on Y : -18.010565 | |
504 - phosphorylation with neutral loss on C : 79.966331 | |
505 - phosphorylation with neutral loss on D : 79.966331 | |
506 - phosphorylation with neutral loss on H : 79.966331 | |
507 - propionyl light K : 56.026215 | |
508 - propionyl light on peptide n-term : 56.026215 | |
509 - propionyl heavy K : 59.036279 | |
510 - propionyl heavy peptide n-term : 59.036279 | |
511 - pyridyl K : 119.037114 | |
512 - pyridyl peptide n-term : 119.037114 | |
513 - pyro-cmC : -17.026549 | |
514 - pyro-glu from n-term E : -18.010565 | |
515 - pyro-glu from n-term Q : -17.026549 | |
516 - oxidation of P to pyroglutamic acid : 13.979265 | |
517 - s-pyridylethylation of C : 105.057849 | |
518 - SeMet : 47.944449 | |
519 - sulfation of Y : 79.956815 | |
520 - sulphone of M : 31.989829 | |
521 - tri-iodination of Y : 377.689944 | |
522 - tri-methylation of R : 42.046950 | |
523 - n-acyl diglyceride cysteine : 788.725777 | |
524 - ICAT light : 227.126991 | |
525 - ICAT heavy : 236.157185 | |
526 - CAMthiopropanoyl K : 145.019749 | |
527 - phosphorylation with neutral loss on S : 79.966331 | |
528 - phosphorylation with neutral loss on T : 79.966331 | |
529 - phosphorylation of S with ETD loss : 79.966331 | |
530 - phosphorylation of T with ETD loss : 79.966331 | |
531 - heavy arginine-13C6 : 6.020129 | |
532 - heavy arginine-13C6-15N4 : 10.008269 | |
533 - heavy lysine-13C6 : 6.020129 | |
534 - PNGasF in O18 water : 2.988261 | |
535 - beta elimination of S : -18.010565 | |
536 - beta elimination of T : -18.010565 | |
537 - oxidation of C to sulfinic acid : 31.989829 | |
538 - arginine to ornithine : -42.021798 | |
539 - dehydro of S and T : -18.010565 | |
540 - carboxykynurenin of W : 47.984744 | |
541 - sumoylation of K : 484.228200 | |
542 - iTRAQ114 on nterm : 144.105918 | |
543 - iTRAQ114 on K : 144.105918 | |
544 - iTRAQ114 on Y : 144.105918 | |
545 - iTRAQ115 on nterm : 144.099599 | |
546 - iTRAQ115 on K : 144.099599 | |
547 - iTRAQ115 on Y : 144.099599 | |
548 - iTRAQ116 on nterm : 144.102063 | |
549 - iTRAQ116 on K : 144.102063 | |
550 - iTRAQ116 on Y : 144.102063 | |
551 - iTRAQ117 on nterm : 144.102063 | |
552 - iTRAQ117 on K : 144.102063 | |
553 - iTRAQ117 on Y : 144.102063 | |
554 - MMTS on C : 45.987721 | |
555 - heavy lysine - 2H4 : 4.025107 | |
556 - heavy lysine - 13C6 15N2 : 8.014199 | |
557 - Asparagine HexNAc : 203.079373 | |
558 - Asparagine dHexHexNAc : 349.137281 | |
559 - Serine HexNAc : 203.079373 | |
560 - Threonine HexNAc : 203.079373 | |
561 - palmitoleyl of S : 236.214016 | |
562 - palmitoleyl of C : 236.214016 | |
563 - palmitoleyl of T : 236.214016 | |
564 - CHD2-di-methylation of K : 32.056407 | |
565 - CHD2-di-methylation of peptide n-term : 32.056407 | |
566 - Maleimide-PEO2-Biotin of C : 525.225719 | |
567 - phosphorylation of H : 79.966331 | |
568 - oxidation of C : 15.994915 | |
569 - oxidation of Y (duplicate of 64) : 15.994915 | |
570 - Uniblue A on K : 484.039891 | |
571 - deamidation of N : 0.984016 | |
572 - trideuteration of L (SILAC) : 3.018830 | |
573 - TMT duplex on K : 225.155833 | |
574 - TMT duplex on n-term peptide : 225.155833 | |
575 - TMT 6-plex on K : 229.162932 | |
576 - TMT 6-plex on n-term peptide : 229.162932 | |
577 - iTRAQ8plex:13C(7)15N(1) on nterm : 304.205360 | |
578 - iTRAQ8plex:13C(7)15N(1) on K : 304.205360 | |
579 - iTRAQ8plex:13C(7)15N(1) on Y : 304.205360 | |
580 - iTRAQ8plex:13C(6)15N(2) on nterm : 304.199040 | |
581 - iTRAQ8plex:13C(6)15N(2) on K : 304.199040 | |
582 - iTRAQ8plex:13C(6)15N(2) on Y : 304.199040 | |
583 - selenocysteine : 47.944449 | |
584 - carboxymethylated selenocysteine : 105.949928 | |
585 | |
586 ]]> | |
587 </help> | |
588 </tool> |