comparison metaphlan.xml @ 5:abd29bdbfbc9 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 08ec37116aab4268fdb93f175b60e50a0bbfafb2
author iuc
date Mon, 27 Feb 2023 06:59:29 +0000
parents bd2b2a490df8
children 11136e6b78f2
comparison
equal deleted inserted replaced
4:aea2c541d79f 5:abd29bdbfbc9
105 #end if 105 #end if
106 106
107 #if $inputs.db.db_selector == "history" 107 #if $inputs.db.db_selector == "history"
108 mkdir 'ref_db' 108 mkdir 'ref_db'
109 && 109 &&
110 bowtie2-build '$inputs.db.bowtie2db' 'ref_db/custom_db' 110 bowtie2-build --large-index '$inputs.db.bowtie2db' 'ref_db/custom_db'
111 && 111 &&
112 python '$__tool_directory__/customizemetadata.py' 112 python '$__tool_directory__/customizemetadata.py'
113 transform_json_to_pkl 113 transform_json_to_pkl
114 --json '$inputs.db.mpa_pkl' 114 --json '$inputs.db.mpa_pkl'
115 --pkl 'ref_db/custom_db.pkl' 115 --pkl 'ref_db/custom_db.pkl'
121 '$file_path' 121 '$file_path'
122 --input_type '$ext' 122 --input_type '$ext'
123 --read_min_len $inputs.in.read_min_len 123 --read_min_len $inputs.in.read_min_len
124 --bt2_ps '$inputs.in.mapping.bt2_ps' 124 --bt2_ps '$inputs.in.mapping.bt2_ps'
125 --min_mapq_val $inputs.in.mapping.min_mapq_val 125 --min_mapq_val $inputs.in.mapping.min_mapq_val
126 #if $ext == "sam"
127 --nreads \$(cat '$file_path' | grep -c -v '^@')
128 #end if
126 #else 129 #else
127 '$inputs.in.in' 130 '$inputs.in.in'
128 --input_type '$inputs.in.selector' 131 --input_type '$inputs.in.selector'
132 #if $inputs.in.selector == "sam"
133 --nreads \$(cat '$inputs.in.in' | grep -c -v '^@')
134 #end if
129 #end if 135 #end if
130 #if $inputs.db.db_selector == "cached" 136 #if $inputs.db.db_selector == "cached"
131 --bowtie2db '$inputs.db.cached_db.fields.path' 137 --bowtie2db '$inputs.db.cached_db.fields.path'
132 --index '$inputs.db.cached_db.fields.dbkey' 138 --index '$inputs.db.cached_db.fields.dbkey'
133 #else 139 #else
255 <param name="db_selector" type="select" label="Database with clade-specific marker genes"> 261 <param name="db_selector" type="select" label="Database with clade-specific marker genes">
256 <option value="cached" selected="true">Locally cached</option> 262 <option value="cached" selected="true">Locally cached</option>
257 <option value="history">From history</option> 263 <option value="history">From history</option>
258 </param> 264 </param>
259 <when value="cached"> 265 <when value="cached">
260 <param name="cached_db" label="Cached database with clade-specific marker genes" type="select"> 266 <param name="cached_db" type="select" label="Cached database with clade-specific marker genes">
261 <options from_data_table="metaphlan_database"> 267 <options from_data_table="@IDX_DATA_TABLE@">
262 <validator message="No MetaPhlAn database is available" type="no_options" /> 268 <filter type="static_value" column="4" value="@IDX_VERSION@"/>
269 <validator message="No compatible MetaPhlAn database is available" type="no_options"/>
263 </options> 270 </options>
264 </param> 271 </param>
265 </when> 272 </when>
266 <when value="history"> 273 <when value="history">
267 <param argument="--bowtie2db" type="data" format="fasta" label="Database with clade-specific marker genes from history"/> 274 <param argument="--bowtie2db" type="data" format="fasta" label="Database with clade-specific marker genes from history"/>
268 <param argument="--mpa_pkl" type="data" format="json" label="Metadata associate to the database with clade-specific marker genes from history"/> 275 <param argument="--mpa_pkl" type="data" format="json" label="Metadata associated to the database with clade-specific marker genes from history"/>
269 </when> 276 </when>
270 </conditional> 277 </conditional>
271 </section> 278 </section>
272 <section name="analysis" title="Analysis" expanded="true"> 279 <section name="analysis" title="Analysis" expanded="true">
273 <conditional name="analysis_type"> 280 <conditional name="analysis_type">
332 <section name="out" title="Outputs" expanded="true"> 339 <section name="out" title="Outputs" expanded="true">
333 <param argument="--sample_id_key" type="text" value="SampleID" label="Sample ID key for this analysis"/> 340 <param argument="--sample_id_key" type="text" value="SampleID" label="Sample ID key for this analysis"/>
334 <param argument="--sample_id" type="text" value="Metaphlan_Analysis" label="Sample ID for this analysis"/> 341 <param argument="--sample_id" type="text" value="Metaphlan_Analysis" label="Sample ID for this analysis"/>
335 <param argument="--use_group_representative" type='boolean' checked="false" truevalue='--use_group_representative' falsevalue='' 342 <param argument="--use_group_representative" type='boolean' checked="false" truevalue='--use_group_representative' falsevalue=''
336 label="Use a species as representative for species groups?"/> 343 label="Use a species as representative for species groups?"/>
337 <param name="legacy_output" argument="--legacy-output" type='boolean' checked="false" truevalue='--legacy-output' falsevalue='' 344 <param argument="--legacy-output" type='boolean' checked="false" truevalue='--legacy-output' falsevalue=''
338 label="Old MetaPhlAn2 two columns output?"/> 345 label="Old MetaPhlAn2 two columns output?"/>
339 <param argument="--CAMI_format_output" type='boolean' checked="false" truevalue='--CAMI_format_output' falsevalue='' 346 <param argument="--CAMI_format_output" type='boolean' checked="false" truevalue='--CAMI_format_output' falsevalue=''
340 label="Report the profiling using the CAMI output format?"/> 347 label="Report the profiling using the CAMI output format?"/>
341 <param argument="--unknown_estimation" type='boolean' checked="false" truevalue='--unknown_estimation' falsevalue='' 348 <param argument="--unknown_estimation" type='boolean' checked="false" truevalue='--unknown_estimation' falsevalue=''
342 label="Scale relative abundances to the number of reads mapping to known clades in order to estimate unknowness?"/> 349 label="Scale relative abundances to the number of reads mapping to known clades in order to estimate unknowness?"/>
406 <param name="unknown_estimation" value="false"/> 413 <param name="unknown_estimation" value="false"/>
407 <param name="krona_output" value="true"/> 414 <param name="krona_output" value="true"/>
408 </section> 415 </section>
409 <output name="output_file" ftype="tabular"> 416 <output name="output_file" ftype="tabular">
410 <assert_contents> 417 <assert_contents>
411 <has_text text="UNKNOWN"/> 418 <has_text text="UNCLASSIFIED"/>
412 </assert_contents> 419 </assert_contents>
413 </output> 420 </output>
414 <output name="bowtie2out" ftype="tabular"> 421 <output name="bowtie2out" ftype="tabular">
415 <assert_contents> 422 <assert_contents>
416 <not_has_text text="HWI-EAS109_102883399:3:104:7342:14360/1"/> 423 <not_has_text text="HWI-EAS109_102883399:3:104:7342:14360/1"/>
427 <not_has_text text="k__Bacteria"/> 434 <not_has_text text="k__Bacteria"/>
428 <not_has_text text="p__Actinobacteria"/> 435 <not_has_text text="p__Actinobacteria"/>
429 </assert_contents> 436 </assert_contents>
430 </output> 437 </output>
431 <output_collection name="levels" type="list" > 438 <output_collection name="levels" type="list" >
439
432 <element name="all" ftype="tabular"> 440 <element name="all" ftype="tabular">
433 <assert_contents> 441 <assert_contents>
434 <has_text text="class"/> 442 <has_text text="class"/>
435 <has_n_columns n="17"/> 443 <has_n_columns n="17"/>
436 <has_n_lines n="1"/> 444 <has_n_lines n="1"/>
437 </assert_contents> 445 </assert_contents>
438 </element> 446 </element>
447 <element name="class" ftype="tabular">
448 <assert_contents>
449 <has_text text="class_id"/>
450 <not_has_text text="phylum_id"/>
451 <has_n_columns n="3"/>
452 <has_n_lines n="1"/>
453 </assert_contents>
454 </element>
455 <element name="family" ftype="tabular">
456 <assert_contents>
457 <has_text text="family_id"/>
458 <not_has_text text="order"/>
459 <has_n_columns n="3"/>
460 <has_n_lines n="1"/>
461 </assert_contents>
462 </element>
463 <element name="genus" ftype="tabular">
464 <assert_contents>
465 <has_text text="genus_id"/>
466 <not_has_text text="family"/>
467 <has_n_columns n="3"/>
468 <has_n_lines n="1"/>
469 </assert_contents>
470 </element>
439 <element name="kingdom" ftype="tabular"> 471 <element name="kingdom" ftype="tabular">
440 <assert_contents> 472 <assert_contents>
441 <has_text text="kingdom_id"/> 473 <has_text text="kingdom_id"/>
474 <has_n_columns n="3"/>
475 <has_n_lines n="1"/>
476 </assert_contents>
477 </element>
478 <element name="order" ftype="tabular">
479 <assert_contents>
480 <has_text text="order_id"/>
481 <not_has_text text="class_id"/>
442 <has_n_columns n="3"/> 482 <has_n_columns n="3"/>
443 <has_n_lines n="1"/> 483 <has_n_lines n="1"/>
444 </assert_contents> 484 </assert_contents>
445 </element> 485 </element>
446 <element name="phylum" ftype="tabular"> 486 <element name="phylum" ftype="tabular">
449 <not_has_text text="kingdom_id"/> 489 <not_has_text text="kingdom_id"/>
450 <has_n_columns n="3"/> 490 <has_n_columns n="3"/>
451 <has_n_lines n="1"/> 491 <has_n_lines n="1"/>
452 </assert_contents> 492 </assert_contents>
453 </element> 493 </element>
454 <element name="class" ftype="tabular">
455 <assert_contents>
456 <has_text text="class_id"/>
457 <not_has_text text="phylum_id"/>
458 <has_n_columns n="3"/>
459 <has_n_lines n="1"/>
460 </assert_contents>
461 </element>
462 <element name="order" ftype="tabular">
463 <assert_contents>
464 <has_text text="order_id"/>
465 <not_has_text text="class_id"/>
466 <has_n_columns n="3"/>
467 <has_n_lines n="1"/>
468 </assert_contents>
469 </element>
470 <element name="family" ftype="tabular">
471 <assert_contents>
472 <has_text text="family_id"/>
473 <not_has_text text="order"/>
474 <has_n_columns n="3"/>
475 <has_n_lines n="1"/>
476 </assert_contents>
477 </element>
478 <element name="genus" ftype="tabular">
479 <assert_contents>
480 <has_text text="genus_id"/>
481 <not_has_text text="family"/>
482 <has_n_columns n="3"/>
483 <has_n_lines n="1"/>
484 </assert_contents>
485 </element>
486 <element name="species" ftype="tabular"> 494 <element name="species" ftype="tabular">
487 <assert_contents> 495 <assert_contents>
488 <has_text text="species_id"/> 496 <has_text text="species_id"/>
489 <not_has_text text="genus"/> 497 <not_has_text text="genus"/>
490 <has_n_columns n="3"/> 498 <has_n_columns n="3"/>
501 </element> 509 </element>
502 </output_collection> 510 </output_collection>
503 <output name="krona_output_file" ftype="tabular"> 511 <output name="krona_output_file" ftype="tabular">
504 <assert_contents> 512 <assert_contents>
505 <not_has_text text="k__Bacteria"/> 513 <not_has_text text="k__Bacteria"/>
506 <has_n_lines n="0"/> 514 <has_n_lines n="1" delta="1"/>
515 <has_size value="1" delta="1"/>
507 </assert_contents> 516 </assert_contents>
508 </output> 517 </output>
509 </test> 518 </test>
510 <test expect_num_outputs="6"> 519 <test expect_num_outputs="6">
511 <section name="inputs"> 520 <section name="inputs">
579 <has_text text="Gammaproteobacteria"/> 588 <has_text text="Gammaproteobacteria"/>
580 <has_text text="Corynebacterium accolens"/> 589 <has_text text="Corynebacterium accolens"/>
581 <has_n_columns n="17"/> 590 <has_n_columns n="17"/>
582 </assert_contents> 591 </assert_contents>
583 </element> 592 </element>
593 <element name="class" ftype="tabular">
594 <assert_contents>
595 <has_text text="class_id"/>
596 <not_has_text text="phylum_id"/>
597 <has_text text="Actinobacteria"/>
598 <has_n_columns n="3"/>
599 </assert_contents>
600 </element>
601 <element name="family" ftype="tabular">
602 <assert_contents>
603 <has_text text="family_id"/>
604 <not_has_text text="order"/>
605 <has_text text="Propionibacteriaceae"/>
606 <has_n_columns n="3"/>
607 </assert_contents>
608 </element>
609 <element name="genus" ftype="tabular">
610 <assert_contents>
611 <has_text text="genus_id"/>
612 <not_has_text text="family"/>
613 <has_text text="Cutibacterium"/>
614 <has_n_columns n="3"/>
615 </assert_contents>
616 </element>
584 <element name="kingdom" ftype="tabular"> 617 <element name="kingdom" ftype="tabular">
585 <assert_contents> 618 <assert_contents>
586 <has_text text="kingdom_id"/> 619 <has_text text="kingdom_id"/>
587 <has_text text="Bacteria"/> 620 <has_text text="Bacteria"/>
621 <has_n_columns n="3"/>
622 </assert_contents>
623 </element>
624 <element name="order" ftype="tabular">
625 <assert_contents>
626 <has_text text="order_id"/>
627 <not_has_text text="class_id"/>
628 <has_text text="Propionibacteriales"/>
588 <has_n_columns n="3"/> 629 <has_n_columns n="3"/>
589 </assert_contents> 630 </assert_contents>
590 </element> 631 </element>
591 <element name="phylum" ftype="tabular"> 632 <element name="phylum" ftype="tabular">
592 <assert_contents> 633 <assert_contents>
593 <has_text text="phylum_id"/> 634 <has_text text="phylum_id"/>
594 <not_has_text text="kingdom_id"/> 635 <not_has_text text="kingdom_id"/>
595 <has_text text="Firmicutes"/> 636 <has_text text="Firmicutes"/>
596 <has_n_columns n="3"/>
597 </assert_contents>
598 </element>
599 <element name="class" ftype="tabular">
600 <assert_contents>
601 <has_text text="class_id"/>
602 <not_has_text text="phylum_id"/>
603 <has_text text="Actinobacteria"/>
604 <has_n_columns n="3"/>
605 </assert_contents>
606 </element>
607 <element name="order" ftype="tabular">
608 <assert_contents>
609 <has_text text="order_id"/>
610 <not_has_text text="class_id"/>
611 <has_text text="Propionibacteriales"/>
612 <has_n_columns n="3"/>
613 </assert_contents>
614 </element>
615 <element name="family" ftype="tabular">
616 <assert_contents>
617 <has_text text="family_id"/>
618 <not_has_text text="order"/>
619 <has_text text="Propionibacteriaceae"/>
620 <has_n_columns n="3"/>
621 </assert_contents>
622 </element>
623 <element name="genus" ftype="tabular">
624 <assert_contents>
625 <has_text text="genus_id"/>
626 <not_has_text text="family"/>
627 <has_text text="Cutibacterium"/>
628 <has_n_columns n="3"/> 637 <has_n_columns n="3"/>
629 </assert_contents> 638 </assert_contents>
630 </element> 639 </element>
631 <element name="species" ftype="tabular"> 640 <element name="species" ftype="tabular">
632 <assert_contents> 641 <assert_contents>
979 <has_text text="Gammaproteobacteria"/> 988 <has_text text="Gammaproteobacteria"/>
980 <has_text text="Corynebacterium accolens"/> 989 <has_text text="Corynebacterium accolens"/>
981 <has_n_columns n="9"/> 990 <has_n_columns n="9"/>
982 </assert_contents> 991 </assert_contents>
983 </element> 992 </element>
993 <element name="class" ftype="tabular">
994 <assert_contents>
995 <has_text text="class"/>
996 <has_text text="Actinobacteria"/>
997 <has_n_columns n="2"/>
998 </assert_contents>
999 </element>
1000 <element name="family" ftype="tabular">
1001 <assert_contents>
1002 <has_text text="family"/>
1003 <has_text text="Propionibacteriaceae"/>
1004 <has_n_columns n="2"/>
1005 </assert_contents>
1006 </element>
1007 <element name="genus" ftype="tabular">
1008 <assert_contents>
1009 <has_text text="genus"/>
1010 <has_text text="Cutibacterium"/>
1011 <has_n_columns n="2"/>
1012 </assert_contents>
1013 </element>
984 <element name="kingdom" ftype="tabular"> 1014 <element name="kingdom" ftype="tabular">
985 <assert_contents> 1015 <assert_contents>
986 <has_text text="kingdom"/> 1016 <has_text text="kingdom"/>
987 <has_text text="Bacteria"/> 1017 <has_text text="Bacteria"/>
988 <has_n_columns n="2"/> 1018 <has_n_columns n="2"/>
989 </assert_contents> 1019 </assert_contents>
990 </element> 1020 </element>
991 <element name="phylum" ftype="tabular">
992 <assert_contents>
993 <has_text text="phylum"/>
994 <has_text text="Firmicutes"/>
995 <has_n_columns n="2"/>
996 </assert_contents>
997 </element>
998 <element name="class" ftype="tabular">
999 <assert_contents>
1000 <has_text text="class"/>
1001 <has_text text="Actinobacteria"/>
1002 <has_n_columns n="2"/>
1003 </assert_contents>
1004 </element>
1005 <element name="order" ftype="tabular"> 1021 <element name="order" ftype="tabular">
1006 <assert_contents> 1022 <assert_contents>
1007 <has_text text="order"/> 1023 <has_text text="order"/>
1008 <has_text text="Propionibacteriales"/> 1024 <has_text text="Propionibacteriales"/>
1009 <has_n_columns n="2"/> 1025 <has_n_columns n="2"/>
1010 </assert_contents> 1026 </assert_contents>
1011 </element> 1027 </element>
1012 <element name="family" ftype="tabular"> 1028 <element name="phylum" ftype="tabular">
1013 <assert_contents> 1029 <assert_contents>
1014 <has_text text="family"/> 1030 <has_text text="phylum"/>
1015 <has_text text="Propionibacteriaceae"/> 1031 <has_text text="Firmicutes"/>
1016 <has_n_columns n="2"/>
1017 </assert_contents>
1018 </element>
1019 <element name="genus" ftype="tabular">
1020 <assert_contents>
1021 <has_text text="genus"/>
1022 <has_text text="Cutibacterium"/>
1023 <has_n_columns n="2"/> 1032 <has_n_columns n="2"/>
1024 </assert_contents> 1033 </assert_contents>
1025 </element> 1034 </element>
1026 <element name="species" ftype="tabular"> 1035 <element name="species" ftype="tabular">
1027 <assert_contents> 1036 <assert_contents>
1049 <help><![CDATA[ 1058 <help><![CDATA[
1050 What it does 1059 What it does
1051 ============ 1060 ============
1052 1061
1053 MetaPhlAn is a computational tool for profiling the composition of microbial communities (Bacteria, 1062 MetaPhlAn is a computational tool for profiling the composition of microbial communities (Bacteria,
1054 Archaea and Eukaryotes) from metagenomic shotgun sequencing data (i.e. not 16S) with species-level. 1063 Archaea and Eukaryotes) from metagenomic shotgun sequencing data (i.e. not 16S) at species-level.
1055 1064
1056 MetaPhlAn relies on ~1.1M unique clade-specific marker genes identified from ~100,000 reference genomes 1065 MetaPhlAn relies on ~1.1M unique clade-specific marker genes identified from ~100,000 reference genomes
1057 (~99,500 bacterial and archaeal and ~500 eukaryotic), allowing: 1066 (~99,500 bacterial and archaeal and ~500 eukaryotic), allowing:
1058 1067
1059 - unambiguous taxonomic assignments; 1068 - unambiguous taxonomic assignments;
1075 1084
1076 MetaPhlAn introduces the capability of characterizing organisms at the strain level using non 1085 MetaPhlAn introduces the capability of characterizing organisms at the strain level using non
1077 aggregated marker information. Such capability comes with several slightly different flavours and 1086 aggregated marker information. Such capability comes with several slightly different flavours and
1078 are a way to perform strain tracking and comparison across multiple samples. 1087 are a way to perform strain tracking and comparison across multiple samples.
1079 1088
1080 Usually, MetaPhlAn is first ran with the default parameter for the type of analysis to profile the 1089 Usually, MetaPhlAn is first ran with default parameters for the type of analysis to profile the
1081 species present in the community, and then a strain-level profiling can be performed to zoom-in into 1090 species present in the community, and then a strain-level profiling can be performed to zoom-in on
1082 specific species of interest. This operation can be performed quickly as it exploits the bowtie2out 1091 specific species of interest. This operation can be performed quickly as it exploits the bowtie2out
1083 intermediate file saved during the execution of the default analysis type. 1092 intermediate file saved during the execution of the default analysis type.
1084 1093
1085 Inputs 1094 Inputs
1086 ====== 1095 ======
1087 1096
1088 Metaphlan takes as input either: 1097 Metaphlan takes as input either:
1089 1098
1090 - one or several sequence files in Fasta, FastQ (compressed or not) 1099 - one or several sequence files in Fasta, FastQ (whether compressed or not)
1091 - a BowTie2 produced SAM file 1100 - a BowTie2 produced SAM file
1092 - an intermediary mapping file of the microbiota generated by a previous MetaPhlAn run 1101 - an intermediary mapping file of the microbiota generated by a previous MetaPhlAn run
1093 1102
1094 It also need the reference database, which can be locally installed or customized using the dedicated tools. 1103 It also needs the reference database, which can be locally installed or customized using the dedicated tools.
1095 1104
1096 Outputs 1105 Outputs
1097 ======= 1106 =======
1098 1107
1099 The main output file is a tab-separated file with the predicted taxon relative abundances. 1108 The main output is a tab-separated file with the predicted taxon relative abundances.
1100 1109
1101 It also generates a BIOM file and some intermediate files (SAM and BowTie2out) if sequence files are given as inputs. 1110 It also generates a BIOM file and some intermediate files (SAM and BowTie2out) if sequence files are given as inputs.
1102 1111
1103 1112
1104 More help and use cases 1113 More help and use cases
1105 ======================= 1114 =======================
1106 1115
1107 To get more information about MetaPhlAn usage and use cases, please refer to the `Metaphlan documentation`_. 1116 To get more information about MetaPhlAn usage and use cases, please refer to the `Metaphlan documentation`_.
1108 1117
1109 .. _Metaphlan documentation: https://github.com/biobakery/MetaPhlAn/wiki/MetaPhlAn-3.0#basic-usage 1118 .. _Metaphlan documentation: https://github.com/biobakery/MetaPhlAn/wiki/MetaPhlAn-4#Basic-Usage
1110 1119
1111 ]]></help> 1120 ]]></help>
1112 <expand macro="citations"/> 1121 <expand macro="citations"/>
1113 </tool> 1122 </tool>