# HG changeset patch # User azuzolo # Date 1339014997 14400 # Node ID d1cf2064ddd8bbe7e8ea0e482ee48060b12795a6 # Parent 3b1401bf4cb1c2052449d4d75661e10582cd314f Uploaded diff -r 3b1401bf4cb1 -r d1cf2064ddd8 qiime/README --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qiime/README Wed Jun 06 16:36:37 2012 -0400 @@ -0,0 +1,129 @@ +This was a first attempt at providing galaxy tool_wrappers for the Qiime metagenomics package: +You must first istall Qiime: http://qiime.sourceforge.net/install/install.html + + + +Initial tool wrappers were generated by a script searching the qiime scripts (version 1.2.1) for usage info, +and then were hand edited afterwards. + +NOTE: A few of the tool configs worked on the galaxy-central code in April 2011. +I haven't taken time to check them with more recent galaxy releases. + + +I executed the qiime scripts via qiime_wrapper.py +This was to accommmodate moving multiple outputs to history items: http://wiki.g2.bx.psu.edu/Admin/Tools/Multiple%20Output%20Files + + +The datatypes file: metagenomics.py has Mothur datatypes with a start at qiime types added at the end. + + + + +The most common used qiime scripts are: +- check_id_map.py +- split_libraries.py +- pick_otus_through_otu_table.py +- beta_diversity_through_3d_plots.py +- alpha_rarefaction.py +- jackknifed_beta_diversity.py +- filter_by_metadata.py +- filter_otu_table.py +- merge_otu_tables.py +- merge_mapping_files.py + + +Tool_config development status: +The tool configs with a * indicate that the tool at least displayed in galaxy at least once upon time. +( Since these were intially auto generated, some may not make sense in a galaxy framework. ) + + add_taxa.xml + adjust_seq_orientation.xml +* align_seqs.xml +* alpha_diversity.xml metrics - select input/output repeat conditional tree +* alpha_rarefaction.xml +* assign_taxonomy.xmlA assignment_method-select +* beta_diversity.xml +* beta_diversity_through_3d_plots.xml html-plots + beta_significance.xml + blast_wrapper.xml +* check_id_map.xml + collate_alpha.xml +* compare_3d_plots.xml + consensus_tree.xml + convert_otu_table_to_unifrac_sample_mapping.xml + convert_unifrac_sample_mapping_to_otu_table.xml +* denoise.xml +* dissimilarity_mtx_stats.xml + exclude_seqs_by_blast.xml + extract_seqs_by_sample_id.xml +* filter_alignment.xml + filter_by_metadata.xml + filter_fasta.xml + filter_otu_table.xml +* filter_otus_by_sample.xml + fix_arb_fasta.xml + identify_chimeric_seqs.xml +* jackknifed_beta_diversity.xml +* make_2d_plots.xml +* make_3d_plots.xml + make_bootstrapped_tree.xml + make_distance_histograms.xml + make_fastq.xml + make_library_id_lists.xml +* make_otu_heatmap_html.xml +* make_otu_network.xml + make_otu_table.xml + make_per_library_sff.xml + make_phylogeny.xml + make_pie_charts.xml + make_prefs_file.xml + make_qiime_py_file.xml +* make_qiime_rst_file.xml +* make_rarefaction_plots.xml +* make_sra_submission.xml +* merge_denoiser_output.xml + merge_mapping_files.xml + merge_otu_maps.xml + merge_otu_tables.xml + multiple_rarefactions.xml + multiple_rarefactions_even_depth.xml + otu_category_significance.xml +* parallel_align_seqs_pynast.xml + parallel_alpha_diversity.xml +* parallel_assign_taxonomy_blast.xml +* parallel_assign_taxonomy_rdp.xml + parallel_beta_diversity.xml +* parallel_blast.xml + parallel_identify_chimeric_seqs.xml + parallel_multiple_rarefactions.xml +* parallel_pick_otus_blast.xml +* parallel_pick_otus_uclust_ref.xml + per_library_stats.xml +* pick_otus.xml +* pick_otus_through_otu_table.xml + pick_rep_set.xml +* plot_rank_abundance_graph.xml + poller.xml + poller_example.xml + pool_by_metadata.xml + principal_coordinates.xml + print_qiime_config.xml +* process_sff.xml +* process_sra_submission.xml +* quality_scores_plot.xml + shared_phylotypes.xml + single_rarefaction.xml + sort_denoiser_output.xml +* split_libraries.xml +* split_libraries_illumina.xml + sra_spreadsheet_to_map_files.xml + start_parallel_jobs.xml + summarize_otu_by_cat.xml + summarize_taxa.xml +* supervised_learning.xml +* transform_coordinate_matrices.xml +* tree_compare.xml + trflp_file_to_otu_table.xml + trim_sff_primers.xml +* truncate_fasta_qual_files.xml + upgma_cluster.xml diff -r 3b1401bf4cb1 -r d1cf2064ddd8 qiime/align_seqs.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qiime/align_seqs.xml Wed Jun 06 16:36:37 2012 -0400 @@ -0,0 +1,93 @@ + + Align sequences using a variety of alignment methods + + align_seqs.py + + + qiime_wrapper.py + --galaxy_outputdir='$log.extra_files_path' + --galaxy_datasets='^\S+_aligned\.\S+$:'$aligned_fasta,'^\S+_log\.txt$:'$log,'^\S+_failures\.fasta$:'$failures + align_seqs.py + --input_fasta_fp=$input_fasta_fp + --alignment_method=$alignment_method + #if $alignment_method.__str__ == 'pynast': + #if $alignment.template_fp != None and $alignment.template_fp.__str__ != 'None' and $alignment.template_fp.__str__ != '': + --template_fp=$alignment.template_fp + #end if + --pairwise_alignment_method=$pairwise_alignment_method + --min_length=$min_length + --min_percent_id=$min_percent_id + #if $blast_db != None and $blast_db.__str__ != 'None' and $blast_db.__str__ != '': + --blast_db=$blast_db + #end if + #elif $alignment_method.__str__ == 'infernal': + --template_fp=$alignment.template_fp + #end if + + --output_dir='$log.extra_files_path' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + .. class:: warningmark +Note: MUSCLE alignment is still not verified. Use at your own risk. + +For more information, see align_seqs_ in the Qiime documentation. + +Updated and validated 01/16/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + +.. _align_seqs: http://qiime.org/scripts/align_seqs.html + + diff -r 3b1401bf4cb1 -r d1cf2064ddd8 qiime/alpha_diversity.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qiime/alpha_diversity.xml Wed Jun 06 16:36:37 2012 -0400 @@ -0,0 +1,86 @@ + + Calculate alpha diversity on each sample in an otu table, using a variety of alpha diversity metrics + + alpha_diversity.py + + + qiime_wrapper.py + #if $run_type.input_type.__str__ == "multi": + --galaxy_logfile=$output_path + --galaxy_outputdir=$output_path.extra_files_path + #end if + alpha_diversity.py + #if $run_type.input_type.__str__ == "multi": + --input_path=$input_path.extra_files_path + --output_path=$output_path.extra_files_path + #else: + --output_path=$output_path + --input_path=$input_path + #end if + --metrics=$metrics + #if $metrics.__str__ == 'PD_whole_tree': + --tree_path=$tree_path + #end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This tool calculates alpha diversity, or within-sample diversity, using an otu table. Metrics may be selected in any combination. Input can be the log file from multiple_rarefactions (batch alpha diversity), or a single rarefied OTU table (single_rarefaction/single file alpha diversity). When the phylogenetic metric PD_whole_tree is selected, a .tre file must be supplied for the tool to run. The output file is a log file listing all the alpha rarefaction files produced. + +For more information, see alpha_diversity_ in the Qiime documentation. + +Updated and validated 01/16/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + +.. _alpha_diversity: http://qiime.org/scripts/alpha_diversity.html + + diff -r 3b1401bf4cb1 -r d1cf2064ddd8 qiime/assign_taxonomy.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qiime/assign_taxonomy.xml Wed Jun 06 16:36:37 2012 -0400 @@ -0,0 +1,53 @@ + + Assign taxonomy to each sequence + + assign_taxonomy.py + + + qiime_wrapper.py + --galaxy_outputdir='$outputfile.extra_files_path' + --galaxy_datasets='^\S+\.txt$:'$outputfile + assign_taxonomy.py + --input_fasta_fp=$input_fasta_fp + #if $id_to_taxonomy_fp != None and $id_to_taxonomy_fp.__str__ != 'None' and $id_to_taxonomy_fp.__str__ != '': + --id_to_taxonomy_fp=$id_to_taxonomy_fp + #end if + #if $reference_seqs_fp != None and $reference_seqs_fp.__str__ != 'None' and $reference_seqs_fp.__str__ != '': + --reference_seqs_fp=$reference_seqs_fp + #end if + #if $training_data_properties_fp != None and $training_data_properties_fp.__str__ != 'None' and $training_data_properties_fp.__str__ != '': + --training_data_properties_fp.$training_data_properties_fp + #end if + --confidence=$confidence + --assignment_method=rdp + --output_dir='$outputfile.extra_files_path' + + + + + + + + + + + + + + + Only uses RDP. For blast, use MBAC blast tools. + +For more information, see assign_taxonomy_ in the Qiime documentation. + +Updated and validated 01/16/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + +.. _assign_taxonomy: http://qiime.org/scripts/assign_taxonomy.html + + diff -r 3b1401bf4cb1 -r d1cf2064ddd8 qiime/beta_diversity.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qiime/beta_diversity.xml Wed Jun 06 16:36:37 2012 -0400 @@ -0,0 +1,351 @@ + + Calculate beta diversity (pairwise sample dissimilarity) on one or many otu tables + + beta_diversity.py + + + qiime_wrapper.py + --galaxy_outputdir=$__new_file_path__ + #set datasets = [] + #set $path = "" + #if $binary_chisq.__str__ != "None": + #set datasets = $datasets + ["'binary_chisq_.*$:'" + $binary_chisq.__str__] + #if path == "": + #set $path=$binary_chisq.extra_files_path + #end if + #end if + #if $binary_chord.__str__ != "None": + #set datasets = $datasets + ["'binary_chord_.*$:'" + $binary_chord.__str__] + #if path == "": + #set $path=$binary_chord.extra_files_path + #end if + #end if + #if $binary_euclidean.__str__ != "None": + #set datasets = $datasets + ["'binary_euclidean_.*$:'" + $binary_euclidean.__str__] + #if path == "": + #set $path=$binary_euclidean.extra_files_path + #end if + #end if + #if $binary_hamming.__str__ != "None": + #set datasets = $datasets + ["'binary_hamming_.*$:'" + $binary_hamming.__str__] + #if path == "": + #set $path=$binary_hamming.extra_files_path + #end if + #end if + #if $binary_jaccard.__str__ != "None": + #set datasets = $datasets + ["'binary_jaccard_.*$:'" + $binary_jaccard.__str__] + #if path == "": + #set $path=$binary_jaccard.extra_files_path + #end if + #end if + #if $binary_lennon.__str__ != "None": + #set datasets = $datasets + ["'binary_lennon_.*$:'" + $binary_lennon.__str__] + #if path == "": + #set $path=$binary_lennon.extra_files_path + #end if + #end if + #if $binary_ochiai.__str__ != "None": + #set datasets = $datasets + ["'binary_ochiai_.*$:'" + $binary_ochiai.__str__] + #if path == "": + #set $path=$binary_ochiai.extra_files_path + #end if + #end if + #if $binary_pearson.__str__ != "None": + #set datasets = $datasets + ["'binary_pearson_.*$:'" + $binary_pearson.__str__] + #if path == "": + #set $path=$binary_pearson.extra_files_path + #end if + #end if + #if $binary_sorensen_dice.__str__ != "None": + #set datasets = $datasets + ["'binary_sorensen_dice_.*$:'" + $binary_sorensen_dice.__str__] + #if path == "": + #set $path=$binary_sorensen.extra_files_path + #end if + #end if + #if $bray_curtis.__str__ != "None": + #set datasets = $datasets + ["'bray_curtis_.*$:'" + $bray_curtis.__str__] + #if path == "": + #set $path=$bray_curtis.extra_files_path + #end if + #end if + #if $canberra.__str__ != "None": + #set datasets = $datasets + ["'canberra_.*$:'" + $canberra.__str__] + #if path == "": + #set $path=$canberra.extra_files_path + #end if + #end if + #if $chisq.__str__ != "None": + #set datasets = $datasets + ["'chisq_.*$:'" + $chisq.__str__] + #if path == "": + #set $path=$binary_euclidean.extra_files_path + #end if + #end if + #if $chord.__str__ != "None": + #set datasets = $datasets + ["'chord_.*$:'" + $chord.__str__] + #if path == "": + #set $path=$chord.extra_files_path + #end if + #end if + #if $euclidean.__str__ != "None": + #set datasets = $datasets + ["'euclidean_.*$:'" + $euclidean.__str__] + #if path == "": + #set $path=$euclidean.extra_files_path + #end if + #end if + #if $gower.__str__ != "None": + #set datasets = $datasets + ["'gower_.*$:'" + $gower.__str__] + #if path == "": + #set $path=$gower.extra_files_path + #end if + #end if + #if $hellinger.__str__ != "None": + #set datasets = $datasets + ["'hellinger_.*$:'" + $hellinger.__str__] + #if path == "": + #set $path=$hellinger.extra_files_path + #end if + #end if + #if $kulczynski.__str__ != "None": + #set datasets = $datasets + ["'kulczynski_.*$:'" + $kulczynski.__str__] + #if path == "": + #set $path=$kulczynski.extra_files_path + #end if + #end if + #if $manhattan.__str__ != "None": + #set datasets = $datasets + ["'manhattan_.*$:'" + $manhattan.__str__] + #if path == "": + #set $path=$manhattan.extra_files_path + #end if + #end if + #if $morisita_horn.__str__ != "None": + #set datasets = $datasets + ["'morisita_horn_.*$:'" + $morisita_horn.__str__] + #if path == "": + #set $path=$morisita_horn.extra_files_path + #end if + #end if + #if $pearson.__str__ != "None": + #set datasets = $datasets + ["'pearson_.*$:'" + $pearson.__str__] + #if path == "": + #set $path=$pearson.extra_files_path + #end if + #end if + #if $soergel.__str__ != "None": + #set datasets = $datasets + ["'soergel_.*$:'" + $soergel.__str__] + #if path == "": + #set $path=$soergel.extra_files_path + #end if + #end if + #if $spearman_approx.__str__ != "None": + #set datasets = $datasets + ["'spearman_approx_.*$:'" + $spearman_approx.__str__] + #if path == "": + #set $path=$spearman_approx.extra_files_path + #end if + #end if + #if $specprof.__str__ != "None": + #set datasets = $datasets + ["'specprof_.*$:'" + $specprof.__str__] + #if path == "": + #set $path=$specprof.extra_files_path + #end if + #end if + #if $unifrac.__str__ != "None": + #set datasets = $datasets + ["'unifrac_.*$:'" + $unifrac.__str__] + #if path == "": + #set $path=$unifrac.extra_files_path + #end if + #end if + #if $unifrac_g.__str__ != "None": + #set datasets = $datasets + ["'unifrac_g_.*$:'" + $unifrac_g.__str__] + #if path == "": + #set $path=$unifrac_g.extra_files_path + #end if + #end if + #if $unifrac_g_full_tree.__str__ != "None": + #set datasets = $datasets + ["'unifrac_g_full_tree_.*$:'" + $unifrac_g_full_tree.__str__] + #if path == "": + #set $path=$unifrac_g_full_tree.extra_files_path + #end if + #end if + #if $unweighted_unifrac.__str__ != "None": + #set datasets = $datasets + ["'unweighted_unifrac_.*$:'" + $unweighted_unifrac.__str__] + #if path == "": + #set $path=$unweighted_unifrac.extra_files_path + #end if + #end if + #if $unweighted_unifrac_full_tree.__str__ != "None": + #set datasets = $datasets + ["'unweighted_unifrac_full_tree_.*$:'" + $unweighted_unifrac_full_tree.__str__] + #if path == "": + #set $path=$unweighted_unifrac_full_tree.extra_files_path + #end if + #end if + #if $weighted_normalized_unifrac.__str__ != "None": + #set datasets = $datasets + ["'weighted_normalized_unifrac_.*$:'" + $weighted_normalized_unifrac.__str__] + #if path == "": + #set $path=$weighted_normalized_unifrac.extra_files_path + #end if + #end if + #if $weighted_unifrac.__str__ != "None": + #set datasets = $datasets + ["'weighted_unifrac_.*$:'" + $weighted_unifrac.__str__] + #if path == "": + #set $path=$weighted_unifrac.extra_files_path + #end if + #end if + --galaxy_datasets=#echo ','.join($datasets) + --galaxy_new_files_path='$path' + beta_diversity.py + --input_path=$input_path + #if $rows.__str__ != '': + --rows=$rows + #end if + --output_dir=$__new_file_path__ + --metrics=$metrics + #if $tree_path.__str__ != "None" and len($tree_path.__str__) > 0: + --tree_path=$tree_path + #end if + $full_tree + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 'binary_chisq' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'binary_chord' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'binary_euclidean' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'binary_hamming' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'binary_jaccard' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'binary_lennon' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'binary_ochiai' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'binary_pearson' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'binary_sorensen_dice' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'bray_curtis' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'canberra' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'chisq' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'chord' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'euclidean' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'gower' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'hellinger' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'kulczynski' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'manhattan' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'morisita_horn' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'pearson' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'soergel' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'spearman_approx' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'specprof' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'unifrac' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'unifrac_g' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'unifrac_g_full_tree' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'unweighted_unifrac' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'unweighted_unifrac_full_tree' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'weighted_normalized_unifrac' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'weighted_unifrac' in (metrics if isinstance(metrics,list) else [metrics]) + + + + + For more information, see beta_diversity_ in the Qiime documentation. + +Updated and validated 01/18/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + + .. _beta_diversity: http://qiime.org/scripts/beta_diversity.html + + diff -r 3b1401bf4cb1 -r d1cf2064ddd8 qiime/check_id_map.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qiime/check_id_map.xml Wed Jun 06 16:36:37 2012 -0400 @@ -0,0 +1,52 @@ + + Checks user's metadata mapping file for required data, valid format + + check_id_map.py + + + qiime_wrapper.py + --galaxy_outputdir='$log.extra_files_path' + --galaxy_datasets='^\S+_corrected\.txt$:'$corrected_mapping,'^\S+\.log:'$log + check_id_map.py + --map=$map + --output_dir='$log.extra_files_path' + --char_replace=$char_replace + $not_barcoded + $variable_len_barcodes + $disable_primer_check + $verbose + #if $added_demultiplex_field != None and $added_demultiplex_field.__str__ != "": + --added_demultiplex_field=$added_demultiplex_field + #end if + + + + + + + + + + + + + + + + + For more information, see check_id_map_ in the Qiime documentation. + +Updated and validated 01/19/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + + .. _check_id_map: http://qiime.org/scripts/check_id_map.html + + diff -r 3b1401bf4cb1 -r d1cf2064ddd8 qiime/collate_alpha.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qiime/collate_alpha.xml Wed Jun 06 16:36:37 2012 -0400 @@ -0,0 +1,38 @@ + + Collate alpha diversity results + + collate_alpha.py + + + qiime_wrapper.py + --galaxy_outputdir=$output1.extra_files_path + --galaxy_new_files_path='$__new_file_path__' + --galaxy_logfile=$output1 + --galaxy_new_datasets='^\S+\.txt$:txt' + --galaxy_datasetid=$output1.id + collate_alpha.py + --input_path=$input_path.extra_files_path + --output_path='$__new_file_path__' + + + + + + + + + + + This tool concatenates all the files generated by alpha_diversity in order to generate rarefaction curves. The input is therefore the log file generated by alpha_diversity, and the output is a log file listing all the output files, as well as the files themselves. Galaxy must be manually refreshed after running this tool to view all output files. + +For more information, see collate_alpha_ in the Qiime documentation. + +Updated and validated 01/16/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + +.. _collate_alpha: http://qiime.org/scripts/collate_alpha.html + + diff -r 3b1401bf4cb1 -r d1cf2064ddd8 qiime/filter_alignment.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qiime/filter_alignment.xml Wed Jun 06 16:36:37 2012 -0400 @@ -0,0 +1,51 @@ + + Filter sequence alignment by removing highly variable regions + + filter_alignment.py + + + qiime_wrapper.py + ## --galaxy_tmpdir='$__new_file_path__' + --galaxy_outputdir='$pfiltered_fasta.extra_files_path' + --galaxy_datasets='^\S+_pfiltered\.\S+$:'$pfiltered_fasta + filter_alignment.py + --input_fasta_file=$input_fasta_file + --output_dir='$pfiltered_fasta.extra_files_path' + --lane_mask_fp=$lane_mask_fp + $suppress_lane_mask_filter + --allowed_gap_frac=$allowed_gap_frac + $remove_outliers + --threshold=$threshold + #if $entropy_threshold != 0.0: + --entropy_threshold=$entropy_threshold + #end if + + + + + + + + + + + + + + + + For more information, see filter_alignment_ in the Qiime documentation. + +Updated and validated 01/16/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + +.. _filter_alignment: http://qiime.org/scripts/filter_alignment.html + + diff -r 3b1401bf4cb1 -r d1cf2064ddd8 qiime/identify_chimeric_seqs.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qiime/identify_chimeric_seqs.xml Wed Jun 06 16:36:37 2012 -0400 @@ -0,0 +1,73 @@ + + Identify chimeric sequences in input FASTA file + + identify_chimeric_seqs.py + + + qiime_wrapper.py + identify_chimeric_seqs.py + --input_fasta_fp=$input_fasta_fp + #if $pick.chimera_detection_method == 'ChimeraSlayer': + --chimera_detection_method=$pick.chimera_detection_method + --aligned_reference_seqs_fp=$pick.aligned_reference_seqs_fp + #if $pick.min_div_ratio.__str__ != '0.0': + --min_div_ratio=$pick.min_div_ratio + #end if + #elif $pick.chimera_detection_method == 'blast_fragments': + --chimera_detection_method=$pick.chimera_detection_method + --id_to_taxonomy_fp=$pick.id_to_taxonomy_fp + #if $pick.blast_db != None and $pick.blast_db.__str__ != "": + --blast_db=$pick.blast_db + #else: + --reference_seqs_fp=$pick.reference_seqs_fp + #end if + --num_fragments=$pick.num_fragments + --taxonomy_depth=$pick.taxonomy_depth + --max_e_value=$pick.max_e_value + #end if + --output_fp=$output_fp + + + + + + + + + + + + + + + + + + + + + + + + + + + + For more information, see identify_chimeric_seqs_ in the Qiime documentation. + +Updated and validated 01/19/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + +.. _identify_chimeric_seqs: http://qiime.org/scripts/identify_chimeric_seqs.html + + diff -r 3b1401bf4cb1 -r d1cf2064ddd8 qiime/lib/._galaxy Binary file qiime/lib/._galaxy has changed diff -r 3b1401bf4cb1 -r d1cf2064ddd8 qiime/lib/galaxy/._datatypes Binary file qiime/lib/galaxy/._datatypes has changed diff -r 3b1401bf4cb1 -r d1cf2064ddd8 qiime/lib/galaxy/datatypes/metagenomics.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qiime/lib/galaxy/datatypes/metagenomics.py Wed Jun 06 16:36:37 2012 -0400 @@ -0,0 +1,1121 @@ +""" +metagenomics datatypes +James E Johnson - University of Minnesota +for Mothur +""" + +import data +import logging, os, sys, time, tempfile, shutil, string, glob, re +import galaxy.model +from galaxy.datatypes import metadata +from galaxy.datatypes import tabular +from galaxy.datatypes import sequence +from galaxy.datatypes.metadata import MetadataElement +from galaxy.datatypes.tabular import Tabular +from galaxy.datatypes.sequence import Fasta +from galaxy import util +from galaxy.datatypes.images import Html +from sniff import * + +log = logging.getLogger(__name__) + + +## Mothur Classes + +class Otu( Tabular ): + file_ext = 'otu' + + def sniff( self, filename ): + """ + Determines whether the file is a otu (operational taxonomic unit) format + """ + try: + fh = open( filename ) + count = 0 + while True: + line = fh.readline() + line = line.strip() + if not line: + break #EOF + if line: + if line[0] != '@': + linePieces = line.split('\t') + if len(linePieces) < 2: + return False + try: + check = int(linePieces[1]) + if check + 2 != len(linePieces): + return False + except ValueError: + return False + count += 1 + if count == 5: + return True + fh.close() + if count < 5 and count > 0: + return True + except: + pass + finally: + fh.close() + return False + +class OtuList( Otu ): + file_ext = 'list' + +class Sabund( Otu ): + file_ext = 'sabund' + + def sniff( self, filename ): + """ + Determines whether the file is a otu (operational taxonomic unit) format + labelcount[value(1..n)] + + """ + try: + fh = open( filename ) + count = 0 + while True: + line = fh.readline() + line = line.strip() + if not line: + break #EOF + if line: + if line[0] != '@': + linePieces = line.split('\t') + if len(linePieces) < 2: + return False + try: + check = int(linePieces[1]) + if check + 2 != len(linePieces): + return False + for i in range( 2, len(linePieces)): + ival = int(linePieces[i]) + except ValueError: + return False + count += 1 + if count >= 5: + return True + fh.close() + if count < 5 and count > 0: + return True + except: + pass + finally: + fh.close() + return False + +class Rabund( Sabund ): + file_ext = 'rabund' + +class GroupAbund( Otu ): + file_ext = 'grpabund' + def init_meta( self, dataset, copy_from=None ): + Otu.init_meta( self, dataset, copy_from=copy_from ) + def set_meta( self, dataset, overwrite = True, skip=1, max_data_lines = 100000, **kwd ): + # See if file starts with header line + if dataset.has_data(): + try: + fh = open( dataset.file_name ) + line = fh.readline() + line = line.strip() + linePieces = line.split('\t') + if linePieces[0] == 'label' and linePieces[1] == 'Group': + skip=1 + else: + skip=0 + finally: + fh.close() + Otu.set_meta( self, dataset, overwrite, skip, max_data_lines, **kwd) + def sniff( self, filename, vals_are_int=False): + """ + Determines whether the file is a otu (operational taxonomic unit) Shared format + labelgroupcount[value(1..n)] + The first line is column headings as of Mothur v 1.20 + """ + log.info( "sniff GroupAbund vals_are_int %s" % vals_are_int) + try: + fh = open( filename ) + count = 0 + while True: + line = fh.readline() + line = line.strip() + if not line: + break #EOF + if line: + if line[0] != '@': + linePieces = line.split('\t') + if len(linePieces) < 3: + return False + if count > 0 or linePieces[0] != 'label': + try: + check = int(linePieces[2]) + if check + 3 != len(linePieces): + return False + for i in range( 3, len(linePieces)): + if vals_are_int: + ival = int(linePieces[i]) + else: + fval = float(linePieces[i]) + except ValueError: + return False + count += 1 + if count >= 5: + return True + fh.close() + if count < 5 and count > 0: + return True + except: + pass + finally: + fh.close() + return False + +class SharedRabund( GroupAbund ): + file_ext = 'shared' + + + def sniff( self, filename ): + """ + Determines whether the file is a otu (operational taxonomic unit) Shared format + labelgroupcount[value(1..n)] + The first line is column headings as of Mothur v 1.20 + """ + # return GroupAbund.sniff(self,filename,True) + isme = GroupAbund.sniff(self,filename,True) + log.info( "is SharedRabund %s" % isme) + return isme + + +class RelAbund( GroupAbund ): + file_ext = 'relabund' + + def sniff( self, filename ): + """ + Determines whether the file is a otu (operational taxonomic unit) Relative Abundance format + labelgroupcount[value(1..n)] + The first line is column headings as of Mothur v 1.20 + """ + # return GroupAbund.sniff(self,filename,False) + isme = GroupAbund.sniff(self,filename,False) + log.info( "is RelAbund %s" % isme) + return isme + +class SecondaryStructureMap(Tabular): + file_ext = 'map' + def __init__(self, **kwd): + """Initialize secondary structure map datatype""" + Tabular.__init__( self, **kwd ) + self.column_names = ['Map'] + + def sniff( self, filename ): + """ + Determines whether the file is a secondary structure map format + A single column with an integer value which indicates the row that this row maps to. + check you make sure is structMap[10] = 380 then structMap[380] = 10. + """ + try: + fh = open( filename ) + line_num = 0 + rowidxmap = {} + while True: + line = fh.readline() + line_num += 1 + line = line.strip() + if not line: + break #EOF + if line: + try: + pointer = int(line) + if pointer > 0: + if pointer > line_num: + rowidxmap[line_num] = pointer + elif pointer < line_num & rowidxmap[pointer] != line_num: + return False + except ValueError: + return False + fh.close() + if count < 5 and count > 0: + return True + except: + pass + finally: + fh.close() + return False + +class SequenceAlignment( Fasta ): + file_ext = 'align' + def __init__(self, **kwd): + Fasta.__init__( self, **kwd ) + """Initialize AlignCheck datatype""" + + def sniff( self, filename ): + """ + Determines whether the file is in Mothur align fasta format + Each sequence line must be the same length + """ + + try: + fh = open( filename ) + len = -1 + while True: + line = fh.readline() + if not line: + break #EOF + line = line.strip() + if line: #first non-empty line + if line.startswith( '>' ): + #The next line.strip() must not be '', nor startwith '>' + line = fh.readline().strip() + if line == '' or line.startswith( '>' ): + break + if len < 0: + len = len(line) + elif len != len(line): + return False + else: + break #we found a non-empty line, but its not a fasta header + if len > 0: + return True + except: + pass + finally: + fh.close() + return False + +class AlignCheck( Tabular ): + file_ext = 'align.check' + def __init__(self, **kwd): + """Initialize AlignCheck datatype""" + Tabular.__init__( self, **kwd ) + self.column_names = ['name','pound','dash','plus','equal','loop','tilde','total'] + self.column_types = ['str','int','int','int','int','int','int','int'] + self.comment_lines = 1 + + def set_meta( self, dataset, overwrite = True, **kwd ): + # Tabular.set_meta( self, dataset, overwrite = overwrite, first_line_is_header = True, skip = 1 ) + data_lines = 0 + if dataset.has_data(): + dataset_fh = open( dataset.file_name ) + while True: + line = dataset_fh.readline() + if not line: break + data_lines += 1 + dataset_fh.close() + dataset.metadata.comment_lines = 1 + dataset.metadata.data_lines = data_lines - 1 if data_lines > 0 else 0 + dataset.metadata.column_names = self.column_names + dataset.metadata.column_types = self.column_types + +class AlignReport(Tabular): + """ +QueryName QueryLength TemplateName TemplateLength SearchMethod SearchScore AlignmentMethod QueryStart QueryEnd TemplateStart TemplateEnd PairwiseAlignmentLength GapsInQuery GapsInTemplate LongestInsert SimBtwnQuery&Template +AY457915 501 82283 1525 kmer 89.07 needleman 5 501 1 499 499 2 0 0 97.6 + """ + file_ext = 'align.report' + def __init__(self, **kwd): + """Initialize AlignCheck datatype""" + Tabular.__init__( self, **kwd ) + self.column_names = ['QueryName','QueryLength','TemplateName','TemplateLength','SearchMethod','SearchScore', + 'AlignmentMethod','QueryStart','QueryEnd','TemplateStart','TemplateEnd', + 'PairwiseAlignmentLength','GapsInQuery','GapsInTemplate','LongestInsert','SimBtwnQuery&Template' + ] + +class BellerophonChimera( Tabular ): + file_ext = 'bellerophon.chimera' + def __init__(self, **kwd): + """Initialize AlignCheck datatype""" + Tabular.__init__( self, **kwd ) + self.column_names = ['Name','Score','Left','Right'] + +class SecondaryStructureMatch(Tabular): + """ + name pound dash plus equal loop tilde total + 9_1_12 42 68 8 28 275 420 872 + 9_1_14 36 68 6 26 266 422 851 + 9_1_15 44 68 8 28 276 418 873 + 9_1_16 34 72 6 30 267 430 860 + 9_1_18 46 80 2 36 261 + """ + def __init__(self, **kwd): + """Initialize SecondaryStructureMatch datatype""" + Tabular.__init__( self, **kwd ) + self.column_names = ['name','pound','dash','plus','equal','loop','tilde','total'] + +class DistanceMatrix(data.Text): + file_ext = 'dist' + """Add metadata elements""" + MetadataElement( name="sequence_count", default=0, desc="Number of sequences", readonly=False, optional=True, no_value=0 ) + + +class LowerTriangleDistanceMatrix(DistanceMatrix): + file_ext = 'lower.dist' + def __init__(self, **kwd): + """Initialize secondary structure map datatype""" + DistanceMatrix.__init__( self, **kwd ) + + def sniff( self, filename ): + """ + Determines whether the file is a lower-triangle distance matrix (phylip) format + The first line has the number of sequences in the matrix. + The remaining lines have the sequence name followed by a list of distances from all preceeding sequences + 5 + U68589 + U68590 0.3371 + U68591 0.3609 0.3782 + U68592 0.4155 0.3197 0.4148 + U68593 0.2872 0.1690 0.3361 0.2842 + """ + try: + fh = open( filename ) + count = 0 + while True: + line = fh.readline() + line = line.strip() + if not line: + break #EOF + if line: + if line[0] != '@': + linePieces = line.split('\t') + if len(linePieces) != 3: + return False + try: + check = float(linePieces[2]) + except ValueError: + return False + count += 1 + if count == 5: + return True + fh.close() + if count < 5 and count > 0: + return True + except: + pass + finally: + fh.close() + return False + +class SquareDistanceMatrix(DistanceMatrix,Tabular): + file_ext = 'square.dist' + sequence_count = -1 + + def __init__(self, **kwd): + """Initialize secondary structure map datatype""" + Tabular.__init__( self, **kwd ) + def init_meta( self, dataset, copy_from=None ): + data.Text.init_meta( self, dataset, copy_from=copy_from ) + def set_meta( self, dataset, overwrite = True, skip = None, **kwd ): + dataset.metadata.sequences = 0 + + def sniff( self, filename ): + """ + Determines whether the file is a square distance matrix (Column-formatted distance matrix) format + The first line has the number of sequences in the matrix. + The following lines have the sequence name in the first column plus a column for the distance to each sequence + in the row order in which they appear in the matrix. + 3 + U68589 0.0000 0.3371 0.3610 + U68590 0.3371 0.0000 0.3783 + U68590 0.3371 0.0000 0.3783 + """ + try: + fh = open( filename ) + count = 0 + line = fh.readline() + line = line.strip() + sequence_count = int(line) + col_cnt = seq_cnt + 1 + while True: + line = fh.readline() + line = line.strip() + if not line: + break #EOF + if line: + if line[0] != '@': + linePieces = line.split('\t') + if len(linePieces) != col_cnt : + return False + try: + for i in range(1, col_cnt): + check = float(linePieces[i]) + except ValueError: + return False + count += 1 + if count == 5: + return True + fh.close() + if count < 5 and count > 0: + return True + except: + pass + finally: + fh.close() + return False + +class PairwiseDistanceMatrix(DistanceMatrix,Tabular): + file_ext = 'pair.dist' + def __init__(self, **kwd): + """Initialize secondary structure map datatype""" + Tabular.__init__( self, **kwd ) + self.column_names = ['Sequence','Sequence','Distance'] + self.column_types = ['str','str','float'] + self.comment_lines = 1 + + def sniff( self, filename ): + """ + Determines whether the file is a pairwise distance matrix (Column-formatted distance matrix) format + The first and second columns have the sequence names and the third column is the distance between those sequences. + """ + try: + fh = open( filename ) + count = 0 + while True: + line = fh.readline() + line = line.strip() + if not line: + break #EOF + if line: + if line[0] != '@': + linePieces = line.split('\t') + if len(linePieces) != 3: + return False + try: + check = float(linePieces[2]) + except ValueError: + return False + count += 1 + if count == 5: + return True + fh.close() + if count < 5 and count > 0: + return True + except: + pass + finally: + fh.close() + return False + +class AlignCheck(Tabular): + file_ext = 'align.check' + def __init__(self, **kwd): + """Initialize secondary structure map datatype""" + Tabular.__init__( self, **kwd ) + self.column_names = ['name','pound','dash','plus','equal','loop','tilde','total'] + self.columns = 8 + +class Names(Tabular): + file_ext = 'names' + def __init__(self, **kwd): + """Name file shows the relationship between a representative sequence(col 1) and the sequences(comma-separated) it represents(col 2)""" + Tabular.__init__( self, **kwd ) + self.column_names = ['name','representatives'] + self.columns = 2 + +class Summary(Tabular): + file_ext = 'summary' + def __init__(self, **kwd): + """summarizes the quality of sequences in an unaligned or aligned fasta-formatted sequence file""" + Tabular.__init__( self, **kwd ) + self.column_names = ['seqname','start','end','nbases','ambigs','polymer'] + self.columns = 6 + +class Group(Tabular): + file_ext = 'groups' + def __init__(self, **kwd): + """Name file shows the relationship between a representative sequence(col 1) and the sequences it represents(col 2)""" + Tabular.__init__( self, **kwd ) + self.column_names = ['name','group'] + self.columns = 2 + +class Design(Tabular): + file_ext = 'design' + def __init__(self, **kwd): + """Name file shows the relationship between a group(col 1) and a grouping (col 2), providing a way to merge groups.""" + Tabular.__init__( self, **kwd ) + self.column_names = ['group','grouping'] + self.columns = 2 + +class AccNos(Tabular): + file_ext = 'accnos' + def __init__(self, **kwd): + """A list of names""" + Tabular.__init__( self, **kwd ) + self.column_names = ['name'] + self.columns = 1 + +class Oligos( data.Text ): + file_ext = 'oligos' + + def sniff( self, filename ): + """ + Determines whether the file is a otu (operational taxonomic unit) format + """ + try: + fh = open( filename ) + count = 0 + while True: + line = fh.readline() + line = line.strip() + if not line: + break #EOF + else: + if line[0] != '#': + linePieces = line.split('\t') + if len(linePieces) == 2 and re.match('forward|reverse',linePieces[0]): + count += 1 + continue + elif len(linePieces) == 3 and re.match('barcode',linePieces[0]): + count += 1 + continue + else: + return False + if count > 20: + return True + if count > 0: + return True + except: + pass + finally: + fh.close() + return False + +class Frequency(Tabular): + file_ext = 'freq' + def __init__(self, **kwd): + """A list of names""" + Tabular.__init__( self, **kwd ) + self.column_names = ['position','frequency'] + self.column_types = ['int','float'] + + def sniff( self, filename ): + """ + Determines whether the file is a frequency tabular format for chimera analysis + #1.14.0 + 0 0.000 + 1 0.000 + ... + 155 0.975 + """ + try: + fh = open( filename ) + count = 0 + while True: + line = fh.readline() + line = line.strip() + if not line: + break #EOF + else: + if line[0] != '#': + try: + linePieces = line.split('\t') + i = int(linePieces[0]) + f = float(linePieces[1]) + count += 1 + continue + except: + return False + if count > 20: + return True + if count > 0: + return True + except: + pass + finally: + fh.close() + return False + +class Quantile(Tabular): + file_ext = 'quan' + MetadataElement( name="filtered", default=False, no_value=False, optional=True , desc="Quantiles calculated using a mask", readonly=True) + MetadataElement( name="masked", default=False, no_value=False, optional=True , desc="Quantiles calculated using a frequency filter", readonly=True) + def __init__(self, **kwd): + """Quantiles for chimera analysis""" + Tabular.__init__( self, **kwd ) + self.column_names = ['num','ten','twentyfive','fifty','seventyfive','ninetyfive','ninetynine'] + self.column_types = ['int','float','float','float','float','float','float'] + def set_meta( self, dataset, overwrite = True, skip = None, **kwd ): + log.info( "Mothur Quantile set_meta %s" % kwd) + def sniff( self, filename ): + """ + Determines whether the file is a quantiles tabular format for chimera analysis + 1 0 0 0 0 0 0 + 2 0.309198 0.309198 0.37161 0.37161 0.37161 0.37161 + 3 0.510982 0.563213 0.693529 0.858939 1.07442 1.20608 + ... + """ + try: + fh = open( filename ) + count = 0 + while True: + line = fh.readline() + line = line.strip() + if not line: + break #EOF + else: + if line[0] != '#': + try: + linePieces = line.split('\t') + i = int(linePieces[0]) + f = float(linePieces[1]) + f = float(linePieces[2]) + f = float(linePieces[3]) + f = float(linePieces[4]) + f = float(linePieces[5]) + f = float(linePieces[6]) + count += 1 + continue + except: + return False + if count > 10: + return True + if count > 0: + return True + except: + pass + finally: + fh.close() + return False + +class FilteredQuantile(Quantile): + file_ext = 'filtered.quan' + def __init__(self, **kwd): + """Quantiles for chimera analysis""" + Quantile.__init__( self, **kwd ) + self.filtered = True + +class MaskedQuantile(Quantile): + file_ext = 'masked.quan' + def __init__(self, **kwd): + """Quantiles for chimera analysis""" + Quantile.__init__( self, **kwd ) + self.masked = True + self.filtered = False + +class FilteredMaskedQuantile(Quantile): + file_ext = 'filtered.masked.quan' + def __init__(self, **kwd): + """Quantiles for chimera analysis""" + Quantile.__init__( self, **kwd ) + self.masked = True + self.filtered = True + +class LaneMask(data.Text): + file_ext = 'filter' + + def sniff( self, filename ): + """ + Determines whether the file is a lane mask filter: 1 line consisting of zeros and ones. + """ + try: + fh = open( filename ) + while True: + buff = fh.read(1000) + if not buff: + break #EOF + else: + if not re.match('^[01]+$',line): + return False + return True + except: + pass + finally: + close(fh) + return False + +class SequenceTaxonomy(Tabular): + file_ext = 'seq.taxonomy' + """ + A table with 2 columns: + - SequenceName + - Taxonomy (semicolon-separated taxonomy in descending order) + Example: + X56533.1 Eukaryota;Alveolata;Ciliophora;Intramacronucleata;Oligohymenophorea;Hymenostomatida;Tetrahymenina;Glaucomidae;Glaucoma; + X97975.1 Eukaryota;Parabasalidea;Trichomonada;Trichomonadida;unclassified_Trichomonadida; + AF052717.1 Eukaryota;Parabasalidea; + """ + def __init__(self, **kwd): + Tabular.__init__( self, **kwd ) + self.column_names = ['name','taxonomy'] + + def sniff( self, filename ): + """ + Determines whether the file is a SequenceTaxonomy + """ + try: + pat = '^([^ \t\n\r\f\v;]+([(]\d+[)])?[;])+$' + fh = open( filename ) + count = 0 + while True: + line = fh.readline() + if not line: + break #EOF + line = line.strip() + if line: + fields = line.split('\t') + if len(fields) != 2: + return False + if not re.match(pat,fields[1]): + return False + count += 1 + if count > 10: + break + if count > 0: + return True + except: + pass + finally: + fh.close() + return False + +class RDPSequenceTaxonomy(SequenceTaxonomy): + file_ext = 'rdp.taxonomy' + """ + A table with 2 columns: + - SequenceName + - Taxonomy (semicolon-separated taxonomy in descending order, RDP requires exactly 6 levels deep) + Example: + AB001518.1 Bacteria;Bacteroidetes;Sphingobacteria;Sphingobacteriales;unclassified_Sphingobacteriales; + AB001724.1 Bacteria;Cyanobacteria;Cyanobacteria;Family_II;GpIIa; + AB001774.1 Bacteria;Chlamydiae;Chlamydiae;Chlamydiales;Chlamydiaceae;Chlamydophila; + """ + def sniff( self, filename ): + """ + Determines whether the file is a SequenceTaxonomy + """ + try: + pat = '^([^ \t\n\r\f\v;]+([(]\d+[)])?[;]){6}$' + fh = open( filename ) + count = 0 + while True: + line = fh.readline() + if not line: + break #EOF + line = line.strip() + if line: + fields = line.split('\t') + if len(fields) != 2: + return False + if not re.match(pat,fields[1]): + return False + count += 1 + if count > 10: + break + if count > 0: + return True + except: + pass + finally: + fh.close() + return False + +class ConsensusTaxonomy(Tabular): + file_ext = 'cons.taxonomy' + def __init__(self, **kwd): + """A list of names""" + Tabular.__init__( self, **kwd ) + self.column_names = ['OTU','count','taxonomy'] + +class TaxonomySummary(Tabular): + file_ext = 'tax.summary' + def __init__(self, **kwd): + """A Summary of taxon classification""" + Tabular.__init__( self, **kwd ) + self.column_names = ['taxlevel','rankID','taxon','daughterlevels','total'] + +class Phylip(data.Text): + file_ext = 'phy' + + def sniff( self, filename ): + """ + Determines whether the file is in Phylip format (Interleaved or Sequential) + The first line of the input file contains the number of species and the + number of characters, in free format, separated by blanks (not by + commas). The information for each species follows, starting with a + ten-character species name (which can include punctuation marks and blanks), + and continuing with the characters for that species. + http://evolution.genetics.washington.edu/phylip/doc/main.html#inputfiles + Interleaved Example: + 6 39 + Archaeopt CGATGCTTAC CGCCGATGCT + HesperorniCGTTACTCGT TGTCGTTACT + BaluchitheTAATGTTAAT TGTTAATGTT + B. virginiTAATGTTCGT TGTTAATGTT + BrontosaurCAAAACCCAT CATCAAAACC + B.subtilisGGCAGCCAAT CACGGCAGCC + + TACCGCCGAT GCTTACCGC + CGTTGTCGTT ACTCGTTGT + AATTGTTAAT GTTAATTGT + CGTTGTTAAT GTTCGTTGT + CATCATCAAA ACCCATCAT + AATCACGGCA GCCAATCAC + """ + try: + fh = open( filename ) + # counts line + line = fh.readline().strip() + linePieces = line.split() + count = int(linePieces[0]) + seq_len = int(linePieces[1]) + # data lines + """ + TODO check data lines + while True: + line = fh.readline() + # name is the first 10 characters + name = line[0:10] + seq = line[10:].strip() + # nucleic base or amino acid 1-char designators (spaces allowed) + bases = ''.join(seq.split()) + # float per base (each separated by space) + """ + return True + except: + pass + finally: + close(fh) + return False + + +class Axes(Tabular): + file_ext = 'axes' + + def __init__(self, **kwd): + """Initialize axes datatype""" + Tabular.__init__( self, **kwd ) + def sniff( self, filename ): + """ + Determines whether the file is an axes format + The first line may have column headings. + The following lines have the name in the first column plus float columns for each axis. + ==> 98_sq_phylip_amazon.fn.unique.pca.axes <== + group axis1 axis2 + forest 0.000000 0.145743 + pasture 0.145743 0.000000 + + ==> 98_sq_phylip_amazon.nmds.axes <== + axis1 axis2 + U68589 0.262608 -0.077498 + U68590 0.027118 0.195197 + U68591 0.329854 0.014395 + """ + try: + fh = open( filename ) + count = 0 + line = fh.readline() + line = line.strip() + col_cnt = None + while True: + line = fh.readline() + line = line.strip() + if not line: + break #EOF + if line: + fields = line.split('\t') + if col_cnt == None: # ignore values in first line as they may be column headings + col_cnt = len(fields) + else: + if len(fields) != col_cnt : + return False + try: + for i in range(1, col_cnt): + check = float(fields[i]) + except ValueError: + return False + count += 1 + if count > 10: + return True + if count > 0: + return True + except: + pass + finally: + fh.close() + return False + +## Qiime Classes + +class QiimeMetadataMapping(Tabular): + MetadataElement( name="column_names", default=[], desc="Column Names", readonly=False, visible=True, no_value=[] ) + file_ext = 'qiimemapping' + + def __init__(self, **kwd): + """ + http://qiime.sourceforge.net/documentation/file_formats.html#mapping-file-overview + Information about the samples necessary to perform the data analysis. + # self.column_names = ['#SampleID','BarcodeSequence','LinkerPrimerSequence','Description'] + """ + Tabular.__init__( self, **kwd ) + + def sniff( self, filename ): + """ + Determines whether the file is a qiime mapping file + Just checking for an appropriate header line for now, could be improved + """ + try: + pat = '#SampleID(\t[a-zA-Z][a-zA-Z0-9_]*)*\tDescription' + fh = open( filename ) + while True: + line = dataset_fh.readline() + if re.match(pat,line): + return True + except: + pass + finally: + close(fh) + return False + + def set_column_names(self, dataset): + if dataset.has_data(): + dataset_fh = open( dataset.file_name ) + line = dataset_fh.readline() + if line.startswith('#SampleID'): + dataset.metadata.column_names = line.strip().split('\t'); + dataset_fh.close() + + def set_meta( self, dataset, overwrite = True, skip = None, max_data_lines = None, **kwd ): + Tabular.set_meta(self, dataset, overwrite, skip, max_data_lines) + self.set_column_names(dataset) + +class QiimeOTU(Tabular): + """ + Associates OTUs with sequence IDs + Example: + 0 FLP3FBN01C2MYD FLP3FBN01B2ALM + 1 FLP3FBN01DF6NE FLP3FBN01CKW1J FLP3FBN01CHVM4 + 2 FLP3FBN01AXQ2Z + """ + file_ext = 'qiimeotu' + +class QiimeOTUTable(Tabular): + """ + #Full OTU Counts + #OTU ID PC.354 PC.355 PC.356 Consensus Lineage + 0 0 1 0 Root;Bacteria;Firmicutes;"Clostridia";Clostridiales + 1 1 3 1 Root;Bacteria + 2 0 2 2 Root;Bacteria;Bacteroidetes + """ + MetadataElement( name="column_names", default=[], desc="Column Names", readonly=False, visible=True, no_value=[] ) + file_ext = 'qiimeotutable' + def init_meta( self, dataset, copy_from=None ): + tabular.Tabular.init_meta( self, dataset, copy_from=copy_from ) + def set_meta( self, dataset, overwrite = True, skip = None, **kwd ): + self.set_column_names(dataset) + def set_column_names(self, dataset): + if dataset.has_data(): + dataset_fh = open( dataset.file_name ) + line = dataset_fh.readline() + line = dataset_fh.readline() + if line.startswith('#OTU ID'): + dataset.metadata.column_names = line.strip().split('\t'); + dataset_fh.close() + dataset.metadata.comment_lines = 2 + +class QiimeDistanceMatrix(Tabular): + """ + PC.354 PC.355 PC.356 + PC.354 0.0 3.177 1.955 + PC.355 3.177 0.0 3.444 + PC.356 1.955 3.444 0.0 + """ + file_ext = 'qiimedistmat' + def init_meta( self, dataset, copy_from=None ): + tabular.Tabular.init_meta( self, dataset, copy_from=copy_from ) + def set_meta( self, dataset, overwrite = True, skip = None, **kwd ): + self.set_column_names(dataset) + def set_column_names(self, dataset): + if dataset.has_data(): + dataset_fh = open( dataset.file_name ) + line = dataset_fh.readline() + # first line contains the names + dataset.metadata.column_names = line.strip().split('\t'); + dataset_fh.close() + dataset.metadata.comment_lines = 1 + +class QiimePCA(Tabular): + """ + Principal Coordinate Analysis Data + The principal coordinate (PC) axes (columns) for each sample (rows). + Pairs of PCs can then be graphed to view the relationships between samples. + The bottom of the output file contains the eigenvalues and % variation explained for each PC. + Example: + pc vector number 1 2 3 + PC.354 -0.309063936588 0.0398252112257 0.0744672231759 + PC.355 -0.106593922619 0.141125998277 0.0780204374172 + PC.356 -0.219869362955 0.00917241121781 0.0357281314115 + + + eigvals 0.480220500471 0.163567082874 0.125594470811 + % variation explained 51.6955484555 17.6079322939 + """ + file_ext = 'qiimepca' + +class QiimeParams(Tabular): + """ +###pick_otus_through_otu_table.py parameters### + +# OTU picker parameters +pick_otus:otu_picking_method uclust +pick_otus:clustering_algorithm furthest + +# Representative set picker parameters +pick_rep_set:rep_set_picking_method first +pick_rep_set:sort_by otu + """ + file_ext = 'qiimeparams' + +class QiimePrefs(data.Text): + """ + A text file, containing coloring preferences to be used by make_distance_histograms.py, make_2d_plots.py and make_3d_plots.py. + Example: +{ +'background_color':'black', + +'sample_coloring': + { + 'Treatment': + { + 'column':'Treatment', + 'colors':(('red',(0,100,100)),('blue',(240,100,100))) + }, + 'DOB': + { + 'column':'DOB', + 'colors':(('red',(0,100,100)),('blue',(240,100,100))) + } + }, +'MONTE_CARLO_GROUP_DISTANCES': + { + 'Treatment': 10, + 'DOB': 10 + } +} + """ + file_ext = 'qiimeprefs' + +class QiimeTaxaSummary(Tabular): + """ + Taxon PC.354 PC.355 PC.356 + Root;Bacteria;Actinobacteria 0.0 0.177 0.955 + Root;Bacteria;Firmicutes 0.177 0.0 0.444 + Root;Bacteria;Proteobacteria 0.955 0.444 0.0 + """ + MetadataElement( name="column_names", default=[], desc="Column Names", readonly=False, visible=True, no_value=[] ) + file_ext = 'qiimetaxsummary' + + def set_column_names(self, dataset): + if dataset.has_data(): + dataset_fh = open( dataset.file_name ) + line = dataset_fh.readline() + if line.startswith('Taxon'): + dataset.metadata.column_names = line.strip().split('\t'); + dataset_fh.close() + + def set_meta( self, dataset, overwrite = True, skip = None, max_data_lines = None, **kwd ): + Tabular.set_meta(self, dataset, overwrite, skip, max_data_lines) + self.set_column_names(dataset) + +if __name__ == '__main__': + import doctest, sys + doctest.testmod(sys.modules[__name__]) + diff -r 3b1401bf4cb1 -r d1cf2064ddd8 qiime/make_2d_plots.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qiime/make_2d_plots.xml Wed Jun 06 16:36:37 2012 -0400 @@ -0,0 +1,66 @@ + + Make 2D PCoA Plots + + make_2d_plots.py + + + qiime_wrapper.py + --galaxy_outputdir=$plot.extra_files_path + ##--galaxy_tmpdir='$__new_file_path__' + ##--galaxy_datasets='^\S+_2D_PCoA_plots\.html$:'$plot + --galaxy_datasets='^\S+\.html$:'$plot + ##--galaxy_datasetid=$output1.id + ##--galaxy_new_files_path='$__new_file_path__' + ##--galaxy_tmpdir='$__new_file_path__' + make_2d_plots.py + --coord_fname=$coord_fname + --map_fname=$map_fname + #if $colorby != None and $colorby.__str__ != 'None': + --colorby=$colorby + #end if + #if $prefs_path != None and $prefs_path.__str__ != 'None' and len($prefs_path.__str__) > 0: + --prefs_path=$prefs_path + #end if + --background_color=$background_color + --ellipsoid_opacity=$ellipsoid_opacity + --ellipsoid_method=$ellipsoid_method + #if $master_pcoa != None and $master_pcoa.__str__ != 'None' and len($master_pcoa.__str__) > 0: + --master_pcoa=$master_pcoa + #end if + --output_dir=$plot.extra_files_path + + + + + + + + + + + + + + + + + + + + For more information, see make_2d_plots_ in the Qiime documentation. + +Updated and validated 01/18/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + + .. _make_2d_plots: http://qiime.org/scripts/make_2d_plots.html + + diff -r 3b1401bf4cb1 -r d1cf2064ddd8 qiime/make_distance_histograms.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qiime/make_distance_histograms.xml Wed Jun 06 16:36:37 2012 -0400 @@ -0,0 +1,56 @@ + + Make distance histograms + + make_distance_histograms.py + + + qiime_wrapper.py + --galaxy_outputdir=$plot.extra_files_path + --galaxy_datasets='^\S+\.html$:'$plot + make_distance_histograms.py + --distance_matrix_file=$distance_matrix_file + --map_fname=$map_fname + #if $prefs_path != None and $prefs_path.__str__ != 'None' and len($prefs_path.__str__) > 0: + --prefs_path=$prefs_path + #end if + --dir_path=$plot.extra_files_path + --background_color=$background_color + $monte_carlo + #if $fields != None and $fields.__str__ != ' ' and $fields.__str__ !='': + --fields=$fields + #end if + --monte_carlo_iters=$monte_carlo_iters + + + + + + + + + + + + + + + + + + + For more information, see make_distance_histograms_ in the Qiime documentation. + +Updated and validated 01/18/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + + .. _make_distance_histograms: http://qiime.org/scripts/make_distance_histograms.html + + diff -r 3b1401bf4cb1 -r d1cf2064ddd8 qiime/make_otu_heatmap_html.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qiime/make_otu_heatmap_html.xml Wed Jun 06 16:36:37 2012 -0400 @@ -0,0 +1,60 @@ + + Make heatmap of OTU table + + make_otu_heatmap_html.py + + + qiime_wrapper.py + --galaxy_outputdir='$otu_heatmp.extra_files_path' + --galaxy_datasets='^\S+\.html$:'$otu_heatmp + make_otu_heatmap_html.py + --otu_table_fp=$otu_table_fp + --output_dir='$otu_heatmp.extra_files_path' + --num_otu_hits=$num_otu_hits + #if $tree != None and $tree.__str__ != 'None': + --tree=$tree + #end if + #if $map_fname != None and $map_fname.__str__ != 'None' > 0: + --map_fname=$map_fname + #end if + #if $sample_tree != None and $sample_tree.__str__ != 'None': + --sample_tree=$sample_tree + #end if + $log_transform + --log_eps=$log_eps + + + + + + + + + + + + + + + + + For more information, see make_otu_heatmap_html_ in the Qiime documentation. + +Updated and validated 02/10/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + +.. _make_otu_heatmap_html: http://qiime.org/scripts/make_otu_heatmap_html.html + + diff -r 3b1401bf4cb1 -r d1cf2064ddd8 qiime/make_otu_table.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qiime/make_otu_table.xml Wed Jun 06 16:36:37 2012 -0400 @@ -0,0 +1,39 @@ + + Make OTU table + + make_otu_table.py + + + qiime_wrapper.py + make_otu_table.py + --otu_map_fp=$otu_map_fp + --output_fp=$output_fp + #if $taxonomy.__str__ != 'None': + --taxonomy=$taxonomy + #end if + #if $exclude_otus_fp.__str__ != 'None': + --exclude_otus_fp=$exclude_otus_fp + #end if + + + + + + + + + + + + For more information, see make_otu_table_ in the Qiime documentation. + +Updated and validated 01/16/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + +.. _make_otu_table: http://qiime.org/scripts/make_otu_table + + diff -r 3b1401bf4cb1 -r d1cf2064ddd8 qiime/make_phylogeny.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qiime/make_phylogeny.xml Wed Jun 06 16:36:37 2012 -0400 @@ -0,0 +1,49 @@ + + Make Phylogeny + + make_phylogeny.py + + + qiime_wrapper.py + --galaxy_inputdir='$__new_file_path__' + --galaxy_ext_change='$input_fp' + --galaxy_new_ext='fasta' + make_phylogeny.py + --input_fp='$__new_file_path__'/temporary.fasta + --tree_method=$tree_method + --result_fp=$result_fp + --log_fp=$log_fp + --root_method=$root_method + + + + + + + + + + + + + + + + + + + + + + For more information, see make_phylogeny_ in the Qiime documentation. + +Updated and validated 01/16/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + +.. _make_phylogeny: http://qiime.org/scripts/make_phylogeny.html + + diff -r 3b1401bf4cb1 -r d1cf2064ddd8 qiime/make_prefs_file.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qiime/make_prefs_file.xml Wed Jun 06 16:36:37 2012 -0400 @@ -0,0 +1,54 @@ + + Generate preferences file + + make_prefs_file.py + + + qiime_wrapper.py + make_prefs_file.py + --map_fname=$map_fname + --output_fp=$output_fp + #if $mapping_headers_to_use != None and $mapping_headers_to_use.__str__ != '': + --mapping_headers_to_use=$mapping_headers_to_use + #end if + --background_color=$background_color + --monte_carlo_dists=$monte_carlo_dists + #if $input_taxa_file != None and $input_taxa_file.__str__ != '' and $input_taxa_file.__str__ != 'None': + --input_taxa_file=$input_taxa_file + #end if + --ball_scale=$ball_scale + --arrow_line_color=$arrow_line_color + --arrow_head_color=$arrow_head_color + + + + + + + + + + + + + + + + + For more information, see make_prefs_file_ in the Qiime documentation. + +Updated and validated 01/18/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + + .. _make_prefs_file: http://qiime.org/scripts/make_prefs_file.html + + diff -r 3b1401bf4cb1 -r d1cf2064ddd8 qiime/make_rarefaction_plots.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qiime/make_rarefaction_plots.xml Wed Jun 06 16:36:37 2012 -0400 @@ -0,0 +1,62 @@ + + Generate Rarefaction Plots + + make_rarefaction_plots.py + + + qiime_wrapper.py + --galaxy_datasets='^\S+\.html$:'$plot + --galaxy_outputdir=$plot.extra_files_path + make_rarefaction_plots.py + --input_dir=$input_dir.extra_files_path + --map_fname=$map_fname + #if $colorby != None and $colorby.__str__ != 'None' and len($colorby.__str__) > 0: + --colorby=$colorby + #end if + #if $prefs_path != None and $prefs_path.__str__ != 'None': + --prefs_path=$prefs_path + #end if + #if $ymax != None and $ymax.__str__ != '': + --ymax=$ymax + #end if + --background_color=$background_color + --imagetype=$imagetype + --resolution=$resolution + --output_dir=$plot.extra_files_path + + + + + + + + + + + + + + + + + + + + + + + + This tool takes the log file output from collate_alpha to create an html file of rarefaction plots wherein you can plot by sample and/or by category. + +For more information, see make_rarefaction_plots_ in the Qiime documentation. + +Updated and validated 01/16/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + +.. _make_rarefaction_plots: http://qiime.org/scripts/make_rarefaction_plots.html + + diff -r 3b1401bf4cb1 -r d1cf2064ddd8 qiime/multiple_rarefactions.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qiime/multiple_rarefactions.xml Wed Jun 06 16:36:37 2012 -0400 @@ -0,0 +1,51 @@ + + Perform multiple subsamplings/rarefactions on an otu table + + multiple_rarefactions.py + + + qiime_wrapper.py + --galaxy_logfile=$output1 + --galaxy_outputdir=$output1.extra_files_path + multiple_rarefactions.py + --input_path=$input_path + --output_path=$output1.extra_files_path + --min=$min + --max=$max + --step=$step + --num-reps=$num_reps + $lineages_included + $keep_empty_otus + + + + + + + + + + + + + + + + This tool rarefies OTU tables for use in jackknife, bootstrap, and rarefaction analyses. Samples with fewer sequences than the rarefaction depth requested for a given output otu table are omitted from those otu tables. The input is an OTU table (e.g., the output from make_otu_table). The output file is a log file listing all the rarefied otu tables produced. + +For more information, see multiple_rarefactions_ in the Qiime documentation. + +Updated and validated 01/16/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + +.. _multiple_rarefactions: http://qiime.org/scripts/multiple_rarefactions.html + + diff -r 3b1401bf4cb1 -r d1cf2064ddd8 qiime/per_library_stats.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qiime/per_library_stats.xml Wed Jun 06 16:36:37 2012 -0400 @@ -0,0 +1,35 @@ + + Calculate per library statistics + + per_library_stats.py + + + qiime_wrapper.py + per_library_stats.py + --otu_table_fp=$otu_table_fp + #if $mapfile != None and $mapfile.__str__ != 'None' and $mapfile.__str__ != '': + --mapfile=$mapfile + #end if + --outputfile=$outputfile + + + + + + + + + + + .. class:: warningmark Warning: log data from standard output currently not available. + +For more information, see per_library_stats_ in the Qiime documentation. + +Updated and validated 01/18/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + + .. _per_library_stats: http://qiime.org/scripts/per_library_stats.html + + diff -r 3b1401bf4cb1 -r d1cf2064ddd8 qiime/pick_otus.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qiime/pick_otus.xml Wed Jun 06 16:36:37 2012 -0400 @@ -0,0 +1,171 @@ + + OTU picking + + pick_otus.py + + + qiime_wrapper.py + --galaxy_outputdir='$log.extra_files_path' + #if $pick.otu_picking_method == 'uclust' and $pick.refseqs_fp.__str__ != 'None': + --galaxy_datasets='^\S+_otus\.txt$:'$otus,'^\S+_otus\.log$:'$log,'^\S+_failures\.txt$:'$failures + #else: + --galaxy_datasets='^\S+_otus\.txt$:'$otus,'^\S+_otus\.log$:'$log + #end if + pick_otus.py + --input_seqs_filepath=$input_seqs_filepath + #if $pick.otu_picking_method.__str__ == 'uclust': + #if $pick.refseqs_fp.__str__ != 'None': + --refseqs_fp=$pick.refseqs_fp + --otu_picking_method='uclust_ref' + $pick.suppress_new_clusters + #else: + --otu_picking_method=$pick.otu_picking_method + #end if + --similarity=$pick.similarity + $pick.enable_rev_strand_match + $pick.optimal_uclust + $pick.exact_uclust + $pick.user_sort + $pick.suppress_presort_by_abundance_uclust + --max_accepts=$pick.max_accepts + --max_rejects=$pick.max_rejects + #if $pick.uclust_otu_id_prefix != None and $pick.uclust_otu_id_prefix.__str__ != 'None' and $pick.uclust_otu_id_prefix.__str__ != '': + --uclust_otu_id_prefix=$pick.uclust_otu_id_prefix + #end if + $pick.uclust_stable_sort + $pick.save_uc_files + #elif $pick.otu_picking_method.__str__ == 'mothur': + --otu_picking_method=$pick.otu_picking_method + --clustering_algorithm=$pick.clustering_algorithm + --similarity=$pick.similarity + #elif $pick.otu_picking_method.__str__ == 'trie': + --otu_picking_method=$pick.otu_picking_method + $pick.trie_reverse_seqs + #elif $pick.otu_picking_method.__str__ == 'prefix_suffix': + --otu_picking_method=$pick.otu_picking_method + --prefix_length=$pick.prefix_length + --suffix_length=$pick.suffix_length + #elif pick.otu_picking_method.__str__ == 'blast': + --otu_picking_method=$pick.otu_picking_method + #if $refseqs_fp.__str__ != 'None': + --refseqs_fp=$pick.refseqs_fp + #end if + #if $pick.blast_db != None and $pick.blast_db.__str__ != 'None' and $pick.blast_db.__str__ != '': + --blast_db=$pick.blast_db + #end if + --similarity=$pick.similarity + --max_e_value=$pick.max_e_value + --min_aligned_percent=$pick.min_aligned_percent + #elif $pick.otu_picking_method == 'cdhit': + --otu_picking_method=$pick.otu_picking_method + --similarity=$pick.similarity + --max_cdhit_memory=$pick.max_cdhit_memory + #if $pick.prefix_prefilter_length != 0: + --prefix_prefilter_length=$pick.prefix_prefilter_length + #end if + $pick.trie_prefilter + #end if + --output_dir='$log.extra_files_path' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + (pick['otu_picking_method'] == 'uclust' and pick['refseqs_fp']) + + + + + For more information, see pick_otus_ in the Qiime documentation. + +Updated and validated 01/16/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + +.. _pick_otus: http://qiime.org/scripts/pick_otus.html + + diff -r 3b1401bf4cb1 -r d1cf2064ddd8 qiime/pick_rep_set.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qiime/pick_rep_set.xml Wed Jun 06 16:36:37 2012 -0400 @@ -0,0 +1,53 @@ + + Pick representative set of sequences + + pick_rep_set.py + + + qiime_wrapper.py + pick_rep_set.py + --input_file=$input_file + #if $reference_seqs_fp.__str__ != 'None' or $reference_seqs_fp != None and $reference_seqs_fp.__str__ == '': + --reference_seqs_fp=$reference_seqs_fp + #else: + --fasta_file=$fasta_file + #end if + --rep_set_picking_method=$rep_set_picking_method + --sort_by=$sort_by + --log_fp=$log_fp + --result_fp=$result_fp + + + + + + + + + + + + + + + + + + + + + + + For more information, see pick_rep_set_ in the Qiime documentation. + +Updated and validated 01/16/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + +.. _pick_rep_set: http://qiime.org/scripts/pick_rep_set.html + + diff -r 3b1401bf4cb1 -r d1cf2064ddd8 qiime/plot_taxa_summary.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qiime/plot_taxa_summary.xml Wed Jun 06 16:36:37 2012 -0400 @@ -0,0 +1,99 @@ + + Make taxaonomy summary charts based on taxonomy assignment + + plot_taxa_summary.py + + + qiime_wrapper.py + --galaxy_outputdir='$taxonomy_summary_chart.extra_files_path' + --galaxy_datasets='^\S+_charts\.html$:'$taxonomy_summary_chart + plot_taxa_summary.py + #set $counts = [] + #for i in $inputs: + #set $counts = $counts + [$i.counts_fname.__str__] + #end for + --counts_fname=#echo ','.join($counts) + #if $labels != None and $labels.__str__ != 'None' and $labels.__str__ != '': + --labels=$labels + #end if + --num_categories=$num_categories + #if $colorby != None and $colorby.__str__ != 'None' and $colorby.__str__ != '': + --colorby=$colorby + #end if + #if $prefs_path != None and $prefs_path.__str__ != 'None': + --prefs_path=$prefs_path + #end if + --background_color=$background_color + --dpi=$dpi + --x_width=$x_width + --y_height=$y_height + --bar_width=$bar_width + --type_of_file=$type_of_file + --chart_type=$chart_type + --resize_nth_label=$resize_nth_label + $include_html_legend + $include_html_counts + --label_type=$label_type + --dir_path='$taxonomy_summary_chart.extra_files_path' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + For more information, see plot_taxa_summary_ in the Qiime documentation. + +Updated and validated 01/20/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + + .. _plot_taxa_summary: http://qiime.org/scripts/plot_taxa_summary.html + + diff -r 3b1401bf4cb1 -r d1cf2064ddd8 qiime/principal_coordinates.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qiime/principal_coordinates.xml Wed Jun 06 16:36:37 2012 -0400 @@ -0,0 +1,40 @@ + + Principal Coordinates Analysis (PCoA) + + principal_coordinates.py + + + qiime_wrapper.py + principal_coordinates.py + --input_path=$input_path + --output_path=$output_path + + + + + + + + + + + + + + + + + + + + + For more information, see principle_coordinates_ in the Qiime documentation. + +Updated and validated 01/18/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + + .. _principle_coordinates: http://qiime.org/scripts/principal_coordinates.html + + diff -r 3b1401bf4cb1 -r d1cf2064ddd8 qiime/qiime_wrapper.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qiime/qiime_wrapper.py Wed Jun 06 16:36:37 2012 -0400 @@ -0,0 +1,312 @@ +#!/usr/bin/env python +import logging, os, string, sys, tempfile, glob, shutil, types, urllib, optparse, re +import shlex, subprocess + +""" +sys.argv +this --galaxy_datasets= --quime_script + +alpha_rarefaction + output html + wf_arare/alpha_rarefaction_plots/rarefaction_plots.html + wf_arare/alpha_rarefaction_plots/html_plots/ + wf_arare/alpha_div + wf_arare/alpha_div/alpha_rarefaction_101_0.txt + + --galaxy_summary_html=$output_html + --galaxy_summary_template=$output_template + --galaxy_summary_links='label:link,label:link' + --galaxy_outputdir=$output_html.extra_files_path + + +""" + +def stop_err( msg ): + sys.stderr.write( "%s\n" % msg ) + sys.exit() + +def __main__(): + debug = False + tmp_dir = None + inputdir = None + outputdir = None + dataset_patterns = None + datasetid = None + new_dataset_patterns = None + new_files_path = None + summary_html=None + summary_template=None + summary_links=None + ## adds "log file" printing capabilities for primary output in dynamic file output + logfile = None + ## added support for correcting file extensions + newext = None + extchange = None + ## check if there are files to generate + cmd_args = [] + for arg in sys.argv[1:]: + if arg.startswith('--galaxy_'): + (opt,val) = arg.split('=') if arg.find('=') > 0 else (arg,None) + ''' + if opt == '--galaxy_tmpdir': + try: + if not os.path.exists(val): + os.makedirs(val) + tmp_dir = val + except Exception, ex: + stop_err(ex) + ''' + if opt == '--galaxy_outputdir': + try: + if not os.path.exists(val): + os.makedirs(val) + outputdir = val + except Exception, ex: + stop_err(ex) + if opt == '--galaxy_datasets': + dataset_patterns = val.split(',') + if opt == '--galaxy_datasetid': + datasetid = val + if opt == '--galaxy_new_datasets': + new_dataset_patterns = val.split(',') + if opt == '--galaxy_new_files_path': + if not os.path.exists(val): + os.makedirs(val) + new_files_path = val + if opt == '--galaxy_summary_html': + summary_html=val + if opt == '--galaxy_summary_template': + summary_template=val + if opt == '--galaxy_summary_links': + summary_links=val + if opt == '--galaxy_debug': + debug = True + if opt == '--galaxy_logfile': + logfile = val + if opt == '--galaxy_ext_change': + extchange = val + if opt == '--galaxy_new_ext': + newext = val + if opt == '--galaxy_inputdir': + inputdir = val + else: + cmd_args.append(arg) + if debug: print >> sys.stdout, '\n : '.join(cmd_args) + try: + stderr = '' + # allow for changing of file extension for files which require it + if extchange != None and inputdir != None and newext != None: + #newfile = os.path.join(inputdir,"temporary."+newext) + try: + os.link(extchange,inputdir+"/temporary."+newext) + except: + shutil.copy2(extchange,inputdir+"/temporary."+newext) + cmdline = ' '.join(cmd_args) + if debug: print >> sys.stdout, cmdline + ''' + if tmp_dir == None or not os.path.isdir(tmp_dir): + tmp_dir = tempfile.mkdtemp() + if outputdir == None or not os.path.isdir(outputdir): + outputdir = tmp_dir + ''' + tmp_stderr_name = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix='.err' ).name + tmp_stderr = open( tmp_stderr_name, 'wb' ) + tmp_stdout_name = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix='.out' ).name + tmp_stdout = open( tmp_stdout_name, 'wb' ) + proc = subprocess.Popen( args=cmdline, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno(), stdout=tmp_stdout.fileno() ) + returncode = proc.wait() + tmp_stderr.close() + # get stderr, allowing for case where it's very large + tmp_stderr = open( tmp_stderr_name, 'rb' ) + buffsize = 1048576 + try: + while True: + stderr += tmp_stderr.read( buffsize ) + if not stderr or len( stderr ) % buffsize != 0: + break + if debug: print >> sys.stderr, stderr + except OverflowError: + pass + tmp_stderr.close() + if returncode != 0: + if debug: print >> sys.stderr, "returncode = %d" % returncode + raise Exception, stderr + #raise Exception, sys.stderr + # collect results + if dataset_patterns != None: + for root, dirs, files in os.walk(outputdir): + for fname in files: + fpath = os.path.join(root,fname) + if dataset_patterns != None: + for output in dataset_patterns: + (pattern,path) = output.split(':') + if debug: print >> sys.stdout, '%s -> %s' % (pattern,path) + if path == None or path == 'None': + continue + if debug: print >> sys.stdout, 'outdir %s match: %s' % (fname,re.match(pattern,fname)) + if re.match(pattern,fname): + found = True + # flist.remove(fname) + try: + shutil.copy2(fpath, path) + if new_files_path != None: + os.link(fpath, os.path.join(new_files_path,fname)) + except Exception, ex: + stop_err('%s' % ex) + # move result to outdir + # Need to flatten the dir hierachy in order for galaxy to serve the href links + if summary_html != None: + """ + for root, dirs, files in os.walk(outputdir): + if root != outputdir: + for fname in files: + fpath = os.path.join(root,fname) + """ + ## move everything up one level + dlist = os.listdir(outputdir) + for dname in dlist: + dpath = os.path.join(outputdir,dname) + if os.path.isdir(dpath): + flist = os.listdir(dpath) + for fname in flist: + fpath = os.path.join(dpath,fname) + shutil.move(fpath,outputdir) + if summary_template != None: + shutil.copy(summary_template,summary_html) + """ + flist = os.listdir(outputdir) + if debug: print >> sys.stdout, 'outputdir: %s' % outputdir + if debug: print >> sys.stdout, 'files: %s' % ','.join(flist) + if dataset_patterns != None: + for output in dataset_patterns: + (pattern,path) = output.split(':') + if debug: print >> sys.stdout, '%s -> %s' % (pattern,path) + if path == None or path == 'None': + continue + for fname in flist: + if debug: print >> sys.stdout, 'outdir %s match: %s' % (fname,re.match(pattern,fname)) + if re.match(pattern,fname): + found = True + flist.remove(fname) + fpath = os.path.join(outputdir,fname) + try: + shutil.copy2(fpath, path) + except Exception, ex: + stop_err('%s' % ex) + """ + # Handle the dynamically generated galaxy datasets + # http://bitbucket.org/galaxy/galaxy-central/wiki/ToolsMultipleOutput + # --new_datasets = specifies files to be found in the new_file_path + # The list items are separated by commas + # Each item conatins: a regex pattern for matching filenames and a galaxy datatype (separated by :) + # The regex match.groups()[0] is used as the id name of the dataset, and must result in unique name for each output + # The --galaxy_output flag is used for instances where data needs to be copied to the extra_files_path for later + # directory use + if new_dataset_patterns != None and new_files_path != None and datasetid != None: + for output in new_dataset_patterns: + if ':' in output: pattern,ext = output.split(':',1) + flist = os.listdir(new_files_path) + for fname in flist: + m = re.match(pattern,fname) + if m: + fpath = os.path.join(new_files_path,fname) + if len(m.groups()) > 0: + root = m.groups()[0] + else: + # remove the ext from the name if it exists, galaxy will add back later + # remove underscores since galaxy uses that as a field separator for dynamic datasets + root = re.sub('\.?'+ext+'$','',fname).replace('_','').replace('.','') + # filename pattern required by galaxy + fn = "%s_%s_%s_%s_%s" % ( 'primary', datasetid, root, 'visible', ext ) + if debug: print >> sys.stdout, '> %s' % fpath + if debug: print >> sys.stdout, '< %s' % os.path.join(new_files_path,fn) + try: + os.link(fpath, os.path.join(new_files_path,fn)) + # needed for files with variable output and a directory structure + if outputdir != None: + os.link(fpath, os.path.join(outputdir,fname)) + # clean out files from tmp directory, may be unnecessary + #os.remove(fpath) + except: + shutil.copy2(fpath, os.path.join(new_files_path,fn)) + # needed for files with variable output and a directory structure + if outputdir != None: + os.link(fpath, os.path.join(outputdir,fname)) + + print "bob" + logfile + ''' + if logfile != None: + print "bleep" + if outputdir != None: + print "beep" + logwrite = open(logfile, 'w+') + logwrite.write('Tool started. Files created by tool: \n') + flist = os.listdir(outputdir) + for fname in flist: + if 'DS_Store' not in fname: + logwrite.write(fname+'\n') + logwrite.write('Tool Finished.') + logwrite.close() + if new_files_path != None: + print "boop" + logwrite = open(logfile, 'w+') + if len(logfile.readline() > 0): + logwrite.write('Tool started. Files created by tool: \n') + flist = os.listdir(new_files_path) + for fname in flist: + if 'DS_Store' not in fname: + logwrite.write(fname+'\n') + logwrite.write('Tool Finished.') + logwrite.close() + ''' + except Exception, e: + msg = str(e) + stderr + #msg = str(e) + str(sys.stderr) + #stop_err( 'Error running ' + msg) + finally: + # Only remove temporary directories and files from temporary directory + # Enclose in try block, so we don't report error on stale nfs handles + try: + if logfile != None: + if outputdir != None: + logwrite = open(logfile, 'r+') + logwrite.write('Tool started. Files created by tool: \n') + flist = os.listdir(outputdir) + for fname in flist: + if 'DS_Store' not in fname and 'primary' not in fname: + logwrite.write(fname+'\n') + logwrite.write('Tool Finished.') + logwrite.close() + if new_files_path != None: + logwrite = open(logfile, 'r+') + logwrite.write('Tool started. Files created by tool: \n') + flist = os.listdir(new_files_path) + for fname in flist: + if 'DS_Store' not in fname and 'primary' not in fname: + logwrite.write(fname+'\n') + logwrite.write('Tool Finished.') + logwrite.close() + if tmp_dir != None and os.path.exists(tmp_dir) and os.path.isfile(tmp_dir): + #shutil.rmtree(tmp_dir) + pass + if outputdir != None and 'files' not in outputdir: + flist = os.listdir(outputdir) + for fname in flist: + if 'DS_Store' not in fname and 'primary' not in fname: + os.remove(os.path.join(outputdir,fname)) + if inputdir != None and 'files' not in inputdir: + flist = os.listdir(inputdir) + for fname in flist: + if 'DS_Store' not in fname and 'primary' not in fname: + os.remove(os.path.join(inputdir,fname)) + if new_files_path != None and 'files' not in new_files_path: + flist = os.listdir(new_files_path) + for fname in flist: + if 'DS_Store' not in fname and 'primary' not in fname: + os.remove(os.path.join(new_files_path,fname)) + + except: + pass + +if __name__ == "__main__": __main__() + diff -r 3b1401bf4cb1 -r d1cf2064ddd8 qiime/single_rarefaction.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qiime/single_rarefaction.xml Wed Jun 06 16:36:37 2012 -0400 @@ -0,0 +1,36 @@ + + Perform rarefaction on an otu table + + single_rarefaction.py + + + qiime_wrapper.py + single_rarefaction.py + --input_path=$input_path + --output_path=$output_path + --depth=$depth + $suppress_lineages_included + $keep_empty_otus + + + + + + + + + + + + + For more information, see single_rarefaction_ in the Qiime documentation. + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + + .. _single_rarefaction: http://qiime.org/scripts/single_rarefaction.html + + diff -r 3b1401bf4cb1 -r d1cf2064ddd8 qiime/split_libraries.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qiime/split_libraries.xml Wed Jun 06 16:36:37 2012 -0400 @@ -0,0 +1,148 @@ + + Split libraries according to barcodes specified in mapping file + + split_libraries.py + + + qiime_wrapper.py + --galaxy_datasets='^seqs\.fna$:'$sequences,'histograms\.txt:'$histograms,'split_library_log\.txt:'$log + --galaxy_outputdir='$log.extra_files_path' + split_libraries.py + --dir-prefix='$log.extra_files_path' + --map=$map + #set fnas = [] + #for i in $inputs: + #set fnas = $fnas + [$i.fasta.__str__] + #end for + --fasta=#echo ','.join($fnas) + #set quals = [] + #for i in $inputs: + #if $i.qual != None and $i.qual.__str__ != 'None': + #set quals = $quals + [$i.qual.__str__] + #end if + #end for + #if len($quals) > 0: + --qual=#echo ','.join($quals) + #end if + #if len($min_seq_length.__str__) > 0 and $min_seq_length > 0: + --min-seq-length=$min_seq_length + #end if + #if len($max_seq_length.__str__) > 0: + --max-seq-length=$max_seq_length + #end if + $trim_seq_length + #if len($min_qual_score.__str__) > 0: + --min-qual-score=$min_qual_score + #end if + $keep_primer + $keep_barcode + #if len($max_ambig.__str__) > 0: + --max-ambig=$max_ambig + #end if + #if len($max_homopolymer.__str__) > 0: + --max-homopolymer=$max_homopolymer + #end if + #if len($max_primer_mismatch.__str__) > 0: + --max-primer-mismatch=$max_primer_mismatch + #end if + --barcode-type=$barcode_type + #if $max_barcode_errors >= 0.: + --max-barcode-errors=$max_barcode_errors + #end if + #if len($start_numbering_at.__str__) > 0: + --start-numbering-at=$start_numbering_at + #end if + $retain_unassigned_reads + $disable_bc_correction + #if len($qual_score_window.__str__) > 0: + --qual_score_window=$qual_score_window + #end if + $disable_primers + --reverse_primers=$reverse_primers + #if $reverse_primer_mismatches != None and $reverse_primer_mismatches.__str__ != "" and $reverse_primers.__str__!='disable': + --reverse_primer_mismatches=$reverse_primer_mismatches + #end if + $record_qual_scores + $discard_bad_windows + #if $median_length_filtering != None and $median_length_filtering.__str__ != "": + --median_length_filtering=$median_length_filtering + #end if + #if $added_demultiplex_field != None and $added_demultiplex_field.__str__ != "": + --added_demultiplex_field=$added_demultiplex_field + #end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + For more information, see split_libraries_ in the Qiime documentation. + +Updated and validated 01/19/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + + .. _split_libraries: http://qiime.org/scripts/split_libraries.html + + + diff -r 3b1401bf4cb1 -r d1cf2064ddd8 qiime/summarize_otu_by_cat.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qiime/summarize_otu_by_cat.xml Wed Jun 06 16:36:37 2012 -0400 @@ -0,0 +1,38 @@ + + Create a summarized OTU table for a specific metadata category + + summarize_otu_by_cat.py + + + qiime_wrapper.py + summarize_otu_by_cat.py + --mapping_fp=$mapping_fp + --otu_table_fp=$otu_table_fp + --mapping_category=$mapping_category + --output_fp=$output_fp + $normalize_flag + + + + + + + + + + + + + For more information, see summarize_otu_by_cat_ in the Qiime documentation. + +Updated and validated 01/18/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + + .. _summarize_otu_by_cat: http://qiime.org/scripts/summarize_otu_by_cat.html + + diff -r 3b1401bf4cb1 -r d1cf2064ddd8 qiime/summarize_taxa.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qiime/summarize_taxa.xml Wed Jun 06 16:36:37 2012 -0400 @@ -0,0 +1,93 @@ + +Summarize Taxa + + summarize_taxa.py + + + qiime_wrapper.py + #set $levelnums = str($level).split(",") + #set $filestr = "" + #if $len($level.__str__) > 1: + #for $i in $levelnums: + #set $filestr = $filestr + '^\\\\S+_L'+$i+'\\\\.txt$:txt,' + #end for + --galaxy_new_datasets=$filestr + --galaxy_datasetid=$output1.id + --galaxy_new_files_path='$__new_file_path__' + --galaxy_logfile=$output1.__str__ + #else: + --galaxy_datasets='^\S+_L'$level'\.txt$:'$output1 + --galaxy_outputdir='$output1.extra_files_path' + #end if + summarize_taxa.py + --otu_table_fp=$otu_table_fp + --level=$level + #if $mapping != None and $mapping.__str__ != 'None': + --mapping=$mapping + #end if + #if $len($level.__str__) > 1: + --output_dir=$__new_file_path__ + #else: + --output_dir='$output1.extra_files_path' + #end if + $absolute_abundance + #if $lower_percentage > 0.0 and $lower_percentage.__str__ != '': + --lower_percentage=$lower_percentage + #end if + #if $upper_percentage > 0.0 and $upper_percentage.__str__ != '': + --upper_percentage=$upper_percentage + #end if + $transposed_output + #if $delimiter.__str__ != ';': + --delimiter=$delimiter + #end if + + + + + + + + + + + + + + + + + + + + + + + + + +.. class:: warningmark + +Please reload your browser to see all results in your history. + +For more information, see summarize_taxa_ in the Qiime documentation. + +Updated and validated 01/19/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + + .. _summarize_taxa: http://qiime.org/scripts/summarize_taxa.html + +