# HG changeset patch # User azuzolo # Date 1339013856 14400 # Node ID d80000f5ad20b29698030a2178464dfb6b68c0b5 # Parent 2c9714f56480338e9698448496649230d98bfeae Uploaded diff -r 2c9714f56480 -r d80000f5ad20 README --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,129 @@ +This was a first attempt at providing galaxy tool_wrappers for the Qiime metagenomics package: +You must first istall Qiime: http://qiime.sourceforge.net/install/install.html + + + +Initial tool wrappers were generated by a script searching the qiime scripts (version 1.2.1) for usage info, +and then were hand edited afterwards. + +NOTE: A few of the tool configs worked on the galaxy-central code in April 2011. +I haven't taken time to check them with more recent galaxy releases. + + +I executed the qiime scripts via qiime_wrapper.py +This was to accommmodate moving multiple outputs to history items: http://wiki.g2.bx.psu.edu/Admin/Tools/Multiple%20Output%20Files + + +The datatypes file: metagenomics.py has Mothur datatypes with a start at qiime types added at the end. + + + + +The most common used qiime scripts are: +- check_id_map.py +- split_libraries.py +- pick_otus_through_otu_table.py +- beta_diversity_through_3d_plots.py +- alpha_rarefaction.py +- jackknifed_beta_diversity.py +- filter_by_metadata.py +- filter_otu_table.py +- merge_otu_tables.py +- merge_mapping_files.py + + +Tool_config development status: +The tool configs with a * indicate that the tool at least displayed in galaxy at least once upon time. +( Since these were intially auto generated, some may not make sense in a galaxy framework. ) + + add_taxa.xml + adjust_seq_orientation.xml +* align_seqs.xml +* alpha_diversity.xml metrics - select input/output repeat conditional tree +* alpha_rarefaction.xml +* assign_taxonomy.xmlA assignment_method-select +* beta_diversity.xml +* beta_diversity_through_3d_plots.xml html-plots + beta_significance.xml + blast_wrapper.xml +* check_id_map.xml + collate_alpha.xml +* compare_3d_plots.xml + consensus_tree.xml + convert_otu_table_to_unifrac_sample_mapping.xml + convert_unifrac_sample_mapping_to_otu_table.xml +* denoise.xml +* dissimilarity_mtx_stats.xml + exclude_seqs_by_blast.xml + extract_seqs_by_sample_id.xml +* filter_alignment.xml + filter_by_metadata.xml + filter_fasta.xml + filter_otu_table.xml +* filter_otus_by_sample.xml + fix_arb_fasta.xml + identify_chimeric_seqs.xml +* jackknifed_beta_diversity.xml +* make_2d_plots.xml +* make_3d_plots.xml + make_bootstrapped_tree.xml + make_distance_histograms.xml + make_fastq.xml + make_library_id_lists.xml +* make_otu_heatmap_html.xml +* make_otu_network.xml + make_otu_table.xml + make_per_library_sff.xml + make_phylogeny.xml + make_pie_charts.xml + make_prefs_file.xml + make_qiime_py_file.xml +* make_qiime_rst_file.xml +* make_rarefaction_plots.xml +* make_sra_submission.xml +* merge_denoiser_output.xml + merge_mapping_files.xml + merge_otu_maps.xml + merge_otu_tables.xml + multiple_rarefactions.xml + multiple_rarefactions_even_depth.xml + otu_category_significance.xml +* parallel_align_seqs_pynast.xml + parallel_alpha_diversity.xml +* parallel_assign_taxonomy_blast.xml +* parallel_assign_taxonomy_rdp.xml + parallel_beta_diversity.xml +* parallel_blast.xml + parallel_identify_chimeric_seqs.xml + parallel_multiple_rarefactions.xml +* parallel_pick_otus_blast.xml +* parallel_pick_otus_uclust_ref.xml + per_library_stats.xml +* pick_otus.xml +* pick_otus_through_otu_table.xml + pick_rep_set.xml +* plot_rank_abundance_graph.xml + poller.xml + poller_example.xml + pool_by_metadata.xml + principal_coordinates.xml + print_qiime_config.xml +* process_sff.xml +* process_sra_submission.xml +* quality_scores_plot.xml + shared_phylotypes.xml + single_rarefaction.xml + sort_denoiser_output.xml +* split_libraries.xml +* split_libraries_illumina.xml + sra_spreadsheet_to_map_files.xml + start_parallel_jobs.xml + summarize_otu_by_cat.xml + summarize_taxa.xml +* supervised_learning.xml +* transform_coordinate_matrices.xml +* tree_compare.xml + trflp_file_to_otu_table.xml + trim_sff_primers.xml +* truncate_fasta_qual_files.xml + upgma_cluster.xml diff -r 2c9714f56480 -r d80000f5ad20 add_taxa.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/add_taxa.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,31 @@ + + Add taxa to OTU table + + add_taxa.py + + + qiime_wrapper.py + add_taxa.py + --otu_file=$otu_file + --taxonomy_file=$taxonomy_file + --output_file=$output_file + --id_map_file=$id_map_file + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 adjust_seq_orientation.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/adjust_seq_orientation.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,29 @@ + + Get the reverse complement of all sequences + + adjust_seq_orientation.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + adjust_seq_orientation.py + --input_fasta_fp=$input_fasta_fp + --output_fp=$output_fp + $retain_seq_id + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 align_seqs.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/align_seqs.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,93 @@ + + Align sequences using a variety of alignment methods + + align_seqs.py + + + qiime_wrapper.py + --galaxy_outputdir='$log.extra_files_path' + --galaxy_datasets='^\S+_aligned\.\S+$:'$aligned_fasta,'^\S+_log\.txt$:'$log,'^\S+_failures\.fasta$:'$failures + align_seqs.py + --input_fasta_fp=$input_fasta_fp + --alignment_method=$alignment_method + #if $alignment_method.__str__ == 'pynast': + #if $alignment.template_fp != None and $alignment.template_fp.__str__ != 'None' and $alignment.template_fp.__str__ != '': + --template_fp=$alignment.template_fp + #end if + --pairwise_alignment_method=$pairwise_alignment_method + --min_length=$min_length + --min_percent_id=$min_percent_id + #if $blast_db != None and $blast_db.__str__ != 'None' and $blast_db.__str__ != '': + --blast_db=$blast_db + #end if + #elif $alignment_method.__str__ == 'infernal': + --template_fp=$alignment.template_fp + #end if + + --output_dir='$log.extra_files_path' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + .. class:: warningmark +Note: MUSCLE alignment is still not verified. Use at your own risk. + +For more information, see align_seqs_ in the Qiime documentation. + +Updated and validated 01/16/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + +.. _align_seqs: http://qiime.org/scripts/align_seqs.html + + diff -r 2c9714f56480 -r d80000f5ad20 align_seqs_py.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/align_seqs_py.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,111 @@ + + Align sequences using a variety of alignment methods + + align_seqs.py + + + qiime_wrapper.py + --galaxy_outputdir='$log.extra_files_path' + --galaxy_datasets='^\S+_aligned\.\S+$:'$aligned_fasta,'^\S+_log\.txt$:'$log,'^\S+_failures\.fasta$:'$failures + align_seqs.py + --input_fasta_fp=$input_fasta_fp + --alignment_method=$align.alignment_method + #if $align.alignment_method == 'pynast': + --template_fp=$align.alignment.template_fp + --pairwise_alignment_method=$align.pairwise_alignment_method + --min_length=$align.min_length + --min_percent_id=$align.min_percent_id + --blast_db=$align.blast_db + #elif $align.alignment_method == 'infernal': + --template_fp=$align.alignment.template_fp + #elif $align.alignment_method == 'clustalw': + #elif $align.alignment_method == 'muscle': + #elif $align.alignment_method == 'mafft': + #end if + + --output_dir='$log.extra_files_path' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 alpha_diversity.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/alpha_diversity.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,86 @@ + + Calculate alpha diversity on each sample in an otu table, using a variety of alpha diversity metrics + + alpha_diversity.py + + + qiime_wrapper.py + #if $run_type.input_type.__str__ == "multi": + --galaxy_logfile=$output_path + --galaxy_outputdir=$output_path.extra_files_path + #end if + alpha_diversity.py + #if $run_type.input_type.__str__ == "multi": + --input_path=$input_path.extra_files_path + --output_path=$output_path.extra_files_path + #else: + --output_path=$output_path + --input_path=$input_path + #end if + --metrics=$metrics + #if $metrics.__str__ == 'PD_whole_tree': + --tree_path=$tree_path + #end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This tool calculates alpha diversity, or within-sample diversity, using an otu table. Metrics may be selected in any combination. Input can be the log file from multiple_rarefactions (batch alpha diversity), or a single rarefied OTU table (single_rarefaction/single file alpha diversity). When the phylogenetic metric PD_whole_tree is selected, a .tre file must be supplied for the tool to run. The output file is a log file listing all the alpha rarefaction files produced. + +For more information, see alpha_diversity_ in the Qiime documentation. + +Updated and validated 01/16/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + +.. _alpha_diversity: http://qiime.org/scripts/alpha_diversity.html + + diff -r 2c9714f56480 -r d80000f5ad20 alpha_rarefaction.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/alpha_rarefaction.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,108 @@ + + A workflow script for performing alpha rarefaction + + alpha_rarefaction.py + + + qiime_wrapper.py + --galaxy_summary_html='$output_html' + --galaxy_outputdir='$output_html.extra_files_path' + --galaxy_summary_template='$output_template' + ## --galaxy_datasets='^rarefaction_plots.html$:'$output_html + alpha_rarefaction.py + --otu_table_fp=$otu_table_fp + --mapping_fp=$mapping_fp + --output_dir=$output_html.extra_files_path + #if $parameter.source == 'hist': + --parameter_fp=$parameter_fp + #else: + --parameter_fp=$parameter_generated + #end if + --num_steps=$num_steps + $force + $print_only + $parallel + #if $tree_fp != None and $tree_fp.__str__ != 'None': + --tree_fp=$tree_fp + #end if + + + + + + + + + + + + + + + + + + + + + + + + +alpha_diversity:metrics chao1,observed_species,PD_whole_tree +multiple_rarefactions_even_depth:num-reps 20 +parallel:jobs_to_start 2 +parallel:retain_temp_files False +parallel:seconds_to_sleep 60 +collate_alpha:example_path +make_rarefaction_plots:imagetype png +make_rarefaction_plots:resolution 75 +make_rarefaction_plots:background_color white +make_rarefaction_plots:prefs_path + + + + +rarefaction_plots.html + + +]]> + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 assign_taxonomy.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/assign_taxonomy.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,53 @@ + + Assign taxonomy to each sequence + + assign_taxonomy.py + + + qiime_wrapper.py + --galaxy_outputdir='$outputfile.extra_files_path' + --galaxy_datasets='^\S+\.txt$:'$outputfile + assign_taxonomy.py + --input_fasta_fp=$input_fasta_fp + #if $id_to_taxonomy_fp != None and $id_to_taxonomy_fp.__str__ != 'None' and $id_to_taxonomy_fp.__str__ != '': + --id_to_taxonomy_fp=$id_to_taxonomy_fp + #end if + #if $reference_seqs_fp != None and $reference_seqs_fp.__str__ != 'None' and $reference_seqs_fp.__str__ != '': + --reference_seqs_fp=$reference_seqs_fp + #end if + #if $training_data_properties_fp != None and $training_data_properties_fp.__str__ != 'None' and $training_data_properties_fp.__str__ != '': + --training_data_properties_fp.$training_data_properties_fp + #end if + --confidence=$confidence + --assignment_method=rdp + --output_dir='$outputfile.extra_files_path' + + + + + + + + + + + + + + + Only uses RDP. For blast, use MBAC blast tools. + +For more information, see assign_taxonomy_ in the Qiime documentation. + +Updated and validated 01/16/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + +.. _assign_taxonomy: http://qiime.org/scripts/assign_taxonomy.html + + diff -r 2c9714f56480 -r d80000f5ad20 beta_diversity.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/beta_diversity.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,351 @@ + + Calculate beta diversity (pairwise sample dissimilarity) on one or many otu tables + + beta_diversity.py + + + qiime_wrapper.py + --galaxy_outputdir=$__new_file_path__ + #set datasets = [] + #set $path = "" + #if $binary_chisq.__str__ != "None": + #set datasets = $datasets + ["'binary_chisq_.*$:'" + $binary_chisq.__str__] + #if path == "": + #set $path=$binary_chisq.extra_files_path + #end if + #end if + #if $binary_chord.__str__ != "None": + #set datasets = $datasets + ["'binary_chord_.*$:'" + $binary_chord.__str__] + #if path == "": + #set $path=$binary_chord.extra_files_path + #end if + #end if + #if $binary_euclidean.__str__ != "None": + #set datasets = $datasets + ["'binary_euclidean_.*$:'" + $binary_euclidean.__str__] + #if path == "": + #set $path=$binary_euclidean.extra_files_path + #end if + #end if + #if $binary_hamming.__str__ != "None": + #set datasets = $datasets + ["'binary_hamming_.*$:'" + $binary_hamming.__str__] + #if path == "": + #set $path=$binary_hamming.extra_files_path + #end if + #end if + #if $binary_jaccard.__str__ != "None": + #set datasets = $datasets + ["'binary_jaccard_.*$:'" + $binary_jaccard.__str__] + #if path == "": + #set $path=$binary_jaccard.extra_files_path + #end if + #end if + #if $binary_lennon.__str__ != "None": + #set datasets = $datasets + ["'binary_lennon_.*$:'" + $binary_lennon.__str__] + #if path == "": + #set $path=$binary_lennon.extra_files_path + #end if + #end if + #if $binary_ochiai.__str__ != "None": + #set datasets = $datasets + ["'binary_ochiai_.*$:'" + $binary_ochiai.__str__] + #if path == "": + #set $path=$binary_ochiai.extra_files_path + #end if + #end if + #if $binary_pearson.__str__ != "None": + #set datasets = $datasets + ["'binary_pearson_.*$:'" + $binary_pearson.__str__] + #if path == "": + #set $path=$binary_pearson.extra_files_path + #end if + #end if + #if $binary_sorensen_dice.__str__ != "None": + #set datasets = $datasets + ["'binary_sorensen_dice_.*$:'" + $binary_sorensen_dice.__str__] + #if path == "": + #set $path=$binary_sorensen.extra_files_path + #end if + #end if + #if $bray_curtis.__str__ != "None": + #set datasets = $datasets + ["'bray_curtis_.*$:'" + $bray_curtis.__str__] + #if path == "": + #set $path=$bray_curtis.extra_files_path + #end if + #end if + #if $canberra.__str__ != "None": + #set datasets = $datasets + ["'canberra_.*$:'" + $canberra.__str__] + #if path == "": + #set $path=$canberra.extra_files_path + #end if + #end if + #if $chisq.__str__ != "None": + #set datasets = $datasets + ["'chisq_.*$:'" + $chisq.__str__] + #if path == "": + #set $path=$binary_euclidean.extra_files_path + #end if + #end if + #if $chord.__str__ != "None": + #set datasets = $datasets + ["'chord_.*$:'" + $chord.__str__] + #if path == "": + #set $path=$chord.extra_files_path + #end if + #end if + #if $euclidean.__str__ != "None": + #set datasets = $datasets + ["'euclidean_.*$:'" + $euclidean.__str__] + #if path == "": + #set $path=$euclidean.extra_files_path + #end if + #end if + #if $gower.__str__ != "None": + #set datasets = $datasets + ["'gower_.*$:'" + $gower.__str__] + #if path == "": + #set $path=$gower.extra_files_path + #end if + #end if + #if $hellinger.__str__ != "None": + #set datasets = $datasets + ["'hellinger_.*$:'" + $hellinger.__str__] + #if path == "": + #set $path=$hellinger.extra_files_path + #end if + #end if + #if $kulczynski.__str__ != "None": + #set datasets = $datasets + ["'kulczynski_.*$:'" + $kulczynski.__str__] + #if path == "": + #set $path=$kulczynski.extra_files_path + #end if + #end if + #if $manhattan.__str__ != "None": + #set datasets = $datasets + ["'manhattan_.*$:'" + $manhattan.__str__] + #if path == "": + #set $path=$manhattan.extra_files_path + #end if + #end if + #if $morisita_horn.__str__ != "None": + #set datasets = $datasets + ["'morisita_horn_.*$:'" + $morisita_horn.__str__] + #if path == "": + #set $path=$morisita_horn.extra_files_path + #end if + #end if + #if $pearson.__str__ != "None": + #set datasets = $datasets + ["'pearson_.*$:'" + $pearson.__str__] + #if path == "": + #set $path=$pearson.extra_files_path + #end if + #end if + #if $soergel.__str__ != "None": + #set datasets = $datasets + ["'soergel_.*$:'" + $soergel.__str__] + #if path == "": + #set $path=$soergel.extra_files_path + #end if + #end if + #if $spearman_approx.__str__ != "None": + #set datasets = $datasets + ["'spearman_approx_.*$:'" + $spearman_approx.__str__] + #if path == "": + #set $path=$spearman_approx.extra_files_path + #end if + #end if + #if $specprof.__str__ != "None": + #set datasets = $datasets + ["'specprof_.*$:'" + $specprof.__str__] + #if path == "": + #set $path=$specprof.extra_files_path + #end if + #end if + #if $unifrac.__str__ != "None": + #set datasets = $datasets + ["'unifrac_.*$:'" + $unifrac.__str__] + #if path == "": + #set $path=$unifrac.extra_files_path + #end if + #end if + #if $unifrac_g.__str__ != "None": + #set datasets = $datasets + ["'unifrac_g_.*$:'" + $unifrac_g.__str__] + #if path == "": + #set $path=$unifrac_g.extra_files_path + #end if + #end if + #if $unifrac_g_full_tree.__str__ != "None": + #set datasets = $datasets + ["'unifrac_g_full_tree_.*$:'" + $unifrac_g_full_tree.__str__] + #if path == "": + #set $path=$unifrac_g_full_tree.extra_files_path + #end if + #end if + #if $unweighted_unifrac.__str__ != "None": + #set datasets = $datasets + ["'unweighted_unifrac_.*$:'" + $unweighted_unifrac.__str__] + #if path == "": + #set $path=$unweighted_unifrac.extra_files_path + #end if + #end if + #if $unweighted_unifrac_full_tree.__str__ != "None": + #set datasets = $datasets + ["'unweighted_unifrac_full_tree_.*$:'" + $unweighted_unifrac_full_tree.__str__] + #if path == "": + #set $path=$unweighted_unifrac_full_tree.extra_files_path + #end if + #end if + #if $weighted_normalized_unifrac.__str__ != "None": + #set datasets = $datasets + ["'weighted_normalized_unifrac_.*$:'" + $weighted_normalized_unifrac.__str__] + #if path == "": + #set $path=$weighted_normalized_unifrac.extra_files_path + #end if + #end if + #if $weighted_unifrac.__str__ != "None": + #set datasets = $datasets + ["'weighted_unifrac_.*$:'" + $weighted_unifrac.__str__] + #if path == "": + #set $path=$weighted_unifrac.extra_files_path + #end if + #end if + --galaxy_datasets=#echo ','.join($datasets) + --galaxy_new_files_path='$path' + beta_diversity.py + --input_path=$input_path + #if $rows.__str__ != '': + --rows=$rows + #end if + --output_dir=$__new_file_path__ + --metrics=$metrics + #if $tree_path.__str__ != "None" and len($tree_path.__str__) > 0: + --tree_path=$tree_path + #end if + $full_tree + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 'binary_chisq' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'binary_chord' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'binary_euclidean' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'binary_hamming' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'binary_jaccard' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'binary_lennon' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'binary_ochiai' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'binary_pearson' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'binary_sorensen_dice' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'bray_curtis' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'canberra' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'chisq' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'chord' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'euclidean' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'gower' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'hellinger' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'kulczynski' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'manhattan' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'morisita_horn' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'pearson' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'soergel' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'spearman_approx' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'specprof' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'unifrac' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'unifrac_g' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'unifrac_g_full_tree' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'unweighted_unifrac' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'unweighted_unifrac_full_tree' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'weighted_normalized_unifrac' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'weighted_unifrac' in (metrics if isinstance(metrics,list) else [metrics]) + + + + + For more information, see beta_diversity_ in the Qiime documentation. + +Updated and validated 01/18/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + + .. _beta_diversity: http://qiime.org/scripts/beta_diversity.html + + diff -r 2c9714f56480 -r d80000f5ad20 beta_diversity_through_3d_plots.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/beta_diversity_through_3d_plots.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,131 @@ + + A workflow script for computing beta diversity distance matrices and the corresponding 3D plots + + beta_diversity_through_3d_plots.py + + + qiime_wrapper.py + --galaxy_outputdir='$log.extra_files_path' + --galaxy_datasets='^log_\S+\.txt$:'$log,'^prefs\.txt$:'$prefs + beta_diversity_through_3d_plots.py + --otu_table_fp=$otu_table_fp + --mapping_fp=$mapping_fp + --output_dir='$log.extra_files_path' + --force + #if $parameter.source == 'hist': + --parameter_fp=$parameter_fp + #else: + --parameter_fp=$parameter_generated + #end if + --tree_fp=$tree_fp + #if int($seqs_per_sample) > 0: + --seqs_per_sample=$seqs_per_sample + #end if + $parallel + + + + + + + + + + + + + + + + + + + + + + + +beta_diversity:metrics +alpha_diversity:metrics chao1,observed_species,PD_whole_tree +multiple_rarefactions_even_depth:num-reps 20 +parallel:jobs_to_start 2 +parallel:retain_temp_files False +parallel:seconds_to_sleep 60 +collate_alpha:example_path +make_rarefaction_plots:imagetype png +make_rarefaction_plots:resolution 75 +make_rarefaction_plots:background_color white +make_rarefaction_plots:prefs_path + + + + + +rarefaction_plots.html + + +]]> + + + + + + + + {weighted_unifrac}_{dataset_4477.dat} + + + + + + + + +$ beta_diversity_through_3d_plots.py -i wf_da/uclust_picked_otus/rep_set/rdp_assigned_taxonomy/otu_table/seqs_otu_table.txt -m Fasting_Map.txt -o wf_bdiv_even146_test/ -p custom_parameters.tt -t wf_da/uclust_picked_otus/rep_set/pynast_aligned_seqs/fasttree_phylogeny/seqs_rep_set.tre -e 146 --print_only + +single_rarefaction.py -i wf_da/uclust_picked_otus/rep_set/rdp_assigned_taxonomy/otu_table/seqs_otu_table.txt -o wf_bdiv_even146_test//seqs_otu_table_even146.txt -d 146 +make_prefs_file.py -m Fasting_Map.txt -o wf_bdiv_even146_test//prefs.txt --monte_carlo_dists 10 --background_color black --mapping_headers_to_use Treatment,DOB +beta_diversity.py -i wf_bdiv_even146_test//seqs_otu_table_even146.txt -o wf_bdiv_even146_test/ --metrics weighted_unifrac -t wf_da/uclust_picked_otus/rep_set/pynast_aligned_seqs/fasttree_phylogeny/seqs_rep_set.tre +principal_coordinates.py -i wf_bdiv_even146_test//weighted_unifrac_seqs_otu_table_even146.txt -o wf_bdiv_even146_test//weighted_unifrac_pc.txt +make_3d_plots.py -p wf_bdiv_even146_test//prefs.txt -i wf_bdiv_even146_test//weighted_unifrac_pc.txt -o wf_bdiv_even146_test//weighted_unifrac_3d_continuous/ -m Fasting_Map.txt --ellipsoid_smoothness 1 +make_3d_plots.py -b "SampleID,BarcodeSequence,LinkerPrimerSequence,Treatment,DOB,Description" -i wf_bdiv_even146_test//weighted_unifrac_pc.txt -o wf_bdiv_even146_test//weighted_unifrac_3d_discrete/ -m Fasting_Map.txt --ellipsoid_smoothness 1 +beta_diversity.py -i wf_bdiv_even146_test//seqs_otu_table_even146.txt -o wf_bdiv_even146_test/ --metrics unweighted_unifrac -t wf_da/uclust_picked_otus/rep_set/pynast_aligned_seqs/fasttree_phylogeny/seqs_rep_set.tre +principal_coordinates.py -i wf_bdiv_even146_test//unweighted_unifrac_seqs_otu_table_even146.txt -o wf_bdiv_even146_test//unweighted_unifrac_pc.txt +make_3d_plots.py -p wf_bdiv_even146_test//prefs.txt -i wf_bdiv_even146_test//unweighted_unifrac_pc.txt -o wf_bdiv_even146_test//unweighted_unifrac_3d_continuous/ -m Fasting_Map.txt --ellipsoid_smoothness 1 +make_3d_plots.py -b "SampleID,BarcodeSequence,LinkerPrimerSequence,Treatment,DOB,Description" -i wf_bdiv_even146_test//unweighted_unifrac_pc.txt -o wf_bdiv_even146_test//unweighted_unifrac_3d_discrete/ -m Fasting_Map.txt --ellipsoid_smoothness 1 + + + + + diff -r 2c9714f56480 -r d80000f5ad20 beta_significance.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/beta_significance.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,35 @@ + + This script runs any of a set of common tests to determine if a sample is statistically significantly different from another sample + + beta_significance.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + beta_significance.py + --input_path=$input_path + --output_path=$output_path + --significance_test=$significance_test + --tree_path=$tree_path + --num_iters=$num_iters + + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 blast_wrapper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/blast_wrapper.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,31 @@ + + Blast Interface + + blast_wrapper.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + blast_wrapper.py + --input_fasta_fp=$input_fasta_fp + --refseqs_fp=$refseqs_fp + --num_seqs_per_blast_run=$num_seqs_per_blast_run + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 categorized_dist_scatterplot.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/categorized_dist_scatterplot.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,41 @@ + + makes a figure representing average distances between samples, broken down by categories. I call it a 'categorized distance scatterplot' + + categorized_dist_scatterplot.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + categorized_dist_scatterplot.py + --map=$map + --distance_matrix=$distance_matrix + --primary_state=$primary_state + --axis_category=$axis_category + --output_path=$output_path + --colorby=$colorby + --secondary_state=$secondary_state + + + + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 check_id_map.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/check_id_map.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,52 @@ + + Checks user's metadata mapping file for required data, valid format + + check_id_map.py + + + qiime_wrapper.py + --galaxy_outputdir='$log.extra_files_path' + --galaxy_datasets='^\S+_corrected\.txt$:'$corrected_mapping,'^\S+\.log:'$log + check_id_map.py + --map=$map + --output_dir='$log.extra_files_path' + --char_replace=$char_replace + $not_barcoded + $variable_len_barcodes + $disable_primer_check + $verbose + #if $added_demultiplex_field != None and $added_demultiplex_field.__str__ != "": + --added_demultiplex_field=$added_demultiplex_field + #end if + + + + + + + + + + + + + + + + + For more information, see check_id_map_ in the Qiime documentation. + +Updated and validated 01/19/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + + .. _check_id_map: http://qiime.org/scripts/check_id_map.html + + diff -r 2c9714f56480 -r d80000f5ad20 collate_alpha.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/collate_alpha.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,38 @@ + + Collate alpha diversity results + + collate_alpha.py + + + qiime_wrapper.py + --galaxy_outputdir=$output1.extra_files_path + --galaxy_new_files_path='$__new_file_path__' + --galaxy_logfile=$output1 + --galaxy_new_datasets='^\S+\.txt$:txt' + --galaxy_datasetid=$output1.id + collate_alpha.py + --input_path=$input_path.extra_files_path + --output_path='$__new_file_path__' + + + + + + + + + + + This tool concatenates all the files generated by alpha_diversity in order to generate rarefaction curves. The input is therefore the log file generated by alpha_diversity, and the output is a log file listing all the output files, as well as the files themselves. Galaxy must be manually refreshed after running this tool to view all output files. + +For more information, see collate_alpha_ in the Qiime documentation. + +Updated and validated 01/16/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + +.. _collate_alpha: http://qiime.org/scripts/collate_alpha.html + + diff -r 2c9714f56480 -r d80000f5ad20 compare_3d_plots.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/compare_3d_plots.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,47 @@ + + Plot several PCoA files on the same 3D plot + + compare_3d_plots.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + compare_3d_plots.py + --coord_fnames=$coord_fnames + --map_fname=$map_fname + --colorby=$colorby + --custom_axes=$custom_axes + --prefs_path=$prefs_path + --background_color=$background_color + --edges_file=$edges_file + $serial + --output_dir=$__new_file_path__ + + + + + + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 consensus_tree.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/consensus_tree.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,29 @@ + + This script outputs a majority consensus tree given a collection of input trees. + + consensus_tree.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + consensus_tree.py + --input_dir=$input_dir + --output_fname=$output_fname + $strict + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 convert_otu_table_to_unifrac_sample_mapping.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/convert_otu_table_to_unifrac_sample_mapping.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,26 @@ + + Convert a QIIME OTU table to a UniFrac sample mapping file + + convert_otu_table_to_unifrac_sample_mapping.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + convert_otu_table_to_unifrac_sample_mapping.py + --otu_table_fp=$otu_table_fp + --output_fp=$output_fp + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 convert_unifrac_sample_mapping_to_otu_table.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/convert_unifrac_sample_mapping_to_otu_table.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,26 @@ + + Convert a UniFrac sample mapping file to an OTU table + + convert_unifrac_sample_mapping_to_otu_table.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + convert_unifrac_sample_mapping_to_otu_table.py + --sample_mapping_fp=$sample_mapping_fp + --output_fp=$output_fp + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 denoise.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/denoise.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,53 @@ + + Denoise a flowgram file + + denoise.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + denoise.py + --input_file=$input_file + --fasta_file=$fasta_file + --output_dir=$__new_file_path__ + --method=$method + $keep_intermediates + --cut-off=$cut_off + --precision=$precision + --num_cpus=$num_cpus + $force_overwrite + --map_fname=$map_fname + --primer=$primer + + + + + + + + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 dissimilarity_mtx_stats.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dissimilarity_mtx_stats.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,26 @@ + + Calculate mean, median and standard deviation from a set of distance matrices + + dissimilarity_mtx_stats.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + dissimilarity_mtx_stats.py + --input_dir=$input_dir + --output_dir=$__new_file_path__ + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 exclude_seqs_by_blast.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/exclude_seqs_by_blast.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,53 @@ + + Exclude contaminated sequences using BLAST + + exclude_seqs_by_blast.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + exclude_seqs_by_blast.py + --querydb=$querydb + --subjectdb=$subjectdb + --outputfilename=$outputfilename + --e_value=$e_value + --percent_aligned=$percent_aligned + $no_clean + --blastmatroot=$blastmatroot + --working_dir=$working_dir + --max_hits=$max_hits + --word_size=$word_size + $no_format_db + + + + + + + + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 extract_seqs_by_sample_id.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extract_seqs_by_sample_id.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,32 @@ + + Extract sequences based on the SampleID + + extract_seqs_by_sample_id.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + extract_seqs_by_sample_id.py + --input_fasta_fp=$input_fasta_fp + --sample_ids=$sample_ids + --output_fasta_fp=$output_fasta_fp + $negate + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 filter_alignment.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter_alignment.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,51 @@ + + Filter sequence alignment by removing highly variable regions + + filter_alignment.py + + + qiime_wrapper.py + ## --galaxy_tmpdir='$__new_file_path__' + --galaxy_outputdir='$pfiltered_fasta.extra_files_path' + --galaxy_datasets='^\S+_pfiltered\.\S+$:'$pfiltered_fasta + filter_alignment.py + --input_fasta_file=$input_fasta_file + --output_dir='$pfiltered_fasta.extra_files_path' + --lane_mask_fp=$lane_mask_fp + $suppress_lane_mask_filter + --allowed_gap_frac=$allowed_gap_frac + $remove_outliers + --threshold=$threshold + #if $entropy_threshold != 0.0: + --entropy_threshold=$entropy_threshold + #end if + + + + + + + + + + + + + + + + For more information, see filter_alignment_ in the Qiime documentation. + +Updated and validated 01/16/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + +.. _filter_alignment: http://qiime.org/scripts/filter_alignment.html + + diff -r 2c9714f56480 -r d80000f5ad20 filter_by_metadata.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter_by_metadata.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,37 @@ + + Filter OTU table by removal of specified metadata + + filter_by_metadata.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + filter_by_metadata.py + --otu_table_fp=$otu_table_fp + --map=$map + --states=$states + --otu_outfile=$otu_outfile + --map_outfile=$map_outfile + --num_seqs_per_otu=$num_seqs_per_otu + + + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 filter_fasta.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter_fasta.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,41 @@ + + This script can be applied to remove sequences from a fasta file based on input criteria. + + filter_fasta.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + filter_fasta.py + --input_fasta_fp=$input_fasta_fp + --output_fasta_fp=$output_fasta_fp + --otu_map=$otu_map + --seq_id_fp=$seq_id_fp + --subject_fasta_fp=$subject_fasta_fp + --seq_id_prefix=$seq_id_prefix + $negate + + + + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 filter_otu_table.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter_otu_table.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,43 @@ + + Filters OTU table by minimum OTU count and number of samples or by taxonomy + + filter_otu_table.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + filter_otu_table.py + --otu_table_fp=$otu_table_fp + --min_count=$min_count + --min_samples=$min_samples + --include_taxonomy=$include_taxonomy + --exclude_taxonomy=$exclude_taxonomy + --dir_path=$dir_path + --seqs_per_sample=$seqs_per_sample + + + + + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 filter_otus_by_sample.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter_otus_by_sample.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,32 @@ + + Filter OTU mapping file and sequences by SampleIDs + + filter_otus_by_sample.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + filter_otus_by_sample.py + --otu_map_fp=$otu_map_fp + --input_fasta_fp=$input_fasta_fp + --samples_to_extract=$samples_to_extract + --output_dir=$__new_file_path__ + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 fix_arb_fasta.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fix_arb_fasta.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,25 @@ + + Reformat ARB FASTA files + + fix_arb_fasta.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + fix_arb_fasta.py + --input_fasta_fp=$input_fasta_fp + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 identify_chimeric_seqs.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/identify_chimeric_seqs.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,73 @@ + + Identify chimeric sequences in input FASTA file + + identify_chimeric_seqs.py + + + qiime_wrapper.py + identify_chimeric_seqs.py + --input_fasta_fp=$input_fasta_fp + #if $pick.chimera_detection_method == 'ChimeraSlayer': + --chimera_detection_method=$pick.chimera_detection_method + --aligned_reference_seqs_fp=$pick.aligned_reference_seqs_fp + #if $pick.min_div_ratio.__str__ != '0.0': + --min_div_ratio=$pick.min_div_ratio + #end if + #elif $pick.chimera_detection_method == 'blast_fragments': + --chimera_detection_method=$pick.chimera_detection_method + --id_to_taxonomy_fp=$pick.id_to_taxonomy_fp + #if $pick.blast_db != None and $pick.blast_db.__str__ != "": + --blast_db=$pick.blast_db + #else: + --reference_seqs_fp=$pick.reference_seqs_fp + #end if + --num_fragments=$pick.num_fragments + --taxonomy_depth=$pick.taxonomy_depth + --max_e_value=$pick.max_e_value + #end if + --output_fp=$output_fp + + + + + + + + + + + + + + + + + + + + + + + + + + + + For more information, see identify_chimeric_seqs_ in the Qiime documentation. + +Updated and validated 01/19/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + +.. _identify_chimeric_seqs: http://qiime.org/scripts/identify_chimeric_seqs.html + + diff -r 2c9714f56480 -r d80000f5ad20 jackknifed_beta_diversity.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/jackknifed_beta_diversity.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,101 @@ + + A workflow script for performing jackknifed UPGMA clustering and build jackknifed 2d and 3D PCoA plots. + + jackknifed_beta_diversity.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + jackknifed_beta_diversity.py + --otu_table_fp=$otu_table_fp + --output_dir=$__new_file_path__ + $force + #if $parameter.source == 'hist': + --parameter_fp=$parameter_fp + #else: + --parameter_fp=$parameter_generated + #end if + --seqs_per_sample=$seqs_per_sample + --mapping_fp=$mapping_fp + --tree_fp=$tree_fp + --master_tree=$master_tree + $print_only + $parallel + + + + + + + + + + + + + + + + + + + + + + + + + + + + +alpha_diversity:metrics chao1,observed_species,PD_whole_tree +multiple_rarefactions_even_depth:num-reps 20 +parallel:jobs_to_start 2 +parallel:retain_temp_files False +parallel:seconds_to_sleep 60 +collate_alpha:example_path +make_rarefaction_plots:imagetype png +make_rarefaction_plots:resolution 75 +make_rarefaction_plots:background_color white +make_rarefaction_plots:prefs_path + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 lib/.DS_Store Binary file lib/.DS_Store has changed diff -r 2c9714f56480 -r d80000f5ad20 lib/._.DS_Store Binary file lib/._.DS_Store has changed diff -r 2c9714f56480 -r d80000f5ad20 lib/._galaxy Binary file lib/._galaxy has changed diff -r 2c9714f56480 -r d80000f5ad20 lib/galaxy/.DS_Store Binary file lib/galaxy/.DS_Store has changed diff -r 2c9714f56480 -r d80000f5ad20 lib/galaxy/._.DS_Store Binary file lib/galaxy/._.DS_Store has changed diff -r 2c9714f56480 -r d80000f5ad20 lib/galaxy/._datatypes Binary file lib/galaxy/._datatypes has changed diff -r 2c9714f56480 -r d80000f5ad20 lib/galaxy/datatypes/._metagenomics.py Binary file lib/galaxy/datatypes/._metagenomics.py has changed diff -r 2c9714f56480 -r d80000f5ad20 lib/galaxy/datatypes/metagenomics.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/galaxy/datatypes/metagenomics.py Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,1121 @@ +""" +metagenomics datatypes +James E Johnson - University of Minnesota +for Mothur +""" + +import data +import logging, os, sys, time, tempfile, shutil, string, glob, re +import galaxy.model +from galaxy.datatypes import metadata +from galaxy.datatypes import tabular +from galaxy.datatypes import sequence +from galaxy.datatypes.metadata import MetadataElement +from galaxy.datatypes.tabular import Tabular +from galaxy.datatypes.sequence import Fasta +from galaxy import util +from galaxy.datatypes.images import Html +from sniff import * + +log = logging.getLogger(__name__) + + +## Mothur Classes + +class Otu( Tabular ): + file_ext = 'otu' + + def sniff( self, filename ): + """ + Determines whether the file is a otu (operational taxonomic unit) format + """ + try: + fh = open( filename ) + count = 0 + while True: + line = fh.readline() + line = line.strip() + if not line: + break #EOF + if line: + if line[0] != '@': + linePieces = line.split('\t') + if len(linePieces) < 2: + return False + try: + check = int(linePieces[1]) + if check + 2 != len(linePieces): + return False + except ValueError: + return False + count += 1 + if count == 5: + return True + fh.close() + if count < 5 and count > 0: + return True + except: + pass + finally: + fh.close() + return False + +class OtuList( Otu ): + file_ext = 'list' + +class Sabund( Otu ): + file_ext = 'sabund' + + def sniff( self, filename ): + """ + Determines whether the file is a otu (operational taxonomic unit) format + labelcount[value(1..n)] + + """ + try: + fh = open( filename ) + count = 0 + while True: + line = fh.readline() + line = line.strip() + if not line: + break #EOF + if line: + if line[0] != '@': + linePieces = line.split('\t') + if len(linePieces) < 2: + return False + try: + check = int(linePieces[1]) + if check + 2 != len(linePieces): + return False + for i in range( 2, len(linePieces)): + ival = int(linePieces[i]) + except ValueError: + return False + count += 1 + if count >= 5: + return True + fh.close() + if count < 5 and count > 0: + return True + except: + pass + finally: + fh.close() + return False + +class Rabund( Sabund ): + file_ext = 'rabund' + +class GroupAbund( Otu ): + file_ext = 'grpabund' + def init_meta( self, dataset, copy_from=None ): + Otu.init_meta( self, dataset, copy_from=copy_from ) + def set_meta( self, dataset, overwrite = True, skip=1, max_data_lines = 100000, **kwd ): + # See if file starts with header line + if dataset.has_data(): + try: + fh = open( dataset.file_name ) + line = fh.readline() + line = line.strip() + linePieces = line.split('\t') + if linePieces[0] == 'label' and linePieces[1] == 'Group': + skip=1 + else: + skip=0 + finally: + fh.close() + Otu.set_meta( self, dataset, overwrite, skip, max_data_lines, **kwd) + def sniff( self, filename, vals_are_int=False): + """ + Determines whether the file is a otu (operational taxonomic unit) Shared format + labelgroupcount[value(1..n)] + The first line is column headings as of Mothur v 1.20 + """ + log.info( "sniff GroupAbund vals_are_int %s" % vals_are_int) + try: + fh = open( filename ) + count = 0 + while True: + line = fh.readline() + line = line.strip() + if not line: + break #EOF + if line: + if line[0] != '@': + linePieces = line.split('\t') + if len(linePieces) < 3: + return False + if count > 0 or linePieces[0] != 'label': + try: + check = int(linePieces[2]) + if check + 3 != len(linePieces): + return False + for i in range( 3, len(linePieces)): + if vals_are_int: + ival = int(linePieces[i]) + else: + fval = float(linePieces[i]) + except ValueError: + return False + count += 1 + if count >= 5: + return True + fh.close() + if count < 5 and count > 0: + return True + except: + pass + finally: + fh.close() + return False + +class SharedRabund( GroupAbund ): + file_ext = 'shared' + + + def sniff( self, filename ): + """ + Determines whether the file is a otu (operational taxonomic unit) Shared format + labelgroupcount[value(1..n)] + The first line is column headings as of Mothur v 1.20 + """ + # return GroupAbund.sniff(self,filename,True) + isme = GroupAbund.sniff(self,filename,True) + log.info( "is SharedRabund %s" % isme) + return isme + + +class RelAbund( GroupAbund ): + file_ext = 'relabund' + + def sniff( self, filename ): + """ + Determines whether the file is a otu (operational taxonomic unit) Relative Abundance format + labelgroupcount[value(1..n)] + The first line is column headings as of Mothur v 1.20 + """ + # return GroupAbund.sniff(self,filename,False) + isme = GroupAbund.sniff(self,filename,False) + log.info( "is RelAbund %s" % isme) + return isme + +class SecondaryStructureMap(Tabular): + file_ext = 'map' + def __init__(self, **kwd): + """Initialize secondary structure map datatype""" + Tabular.__init__( self, **kwd ) + self.column_names = ['Map'] + + def sniff( self, filename ): + """ + Determines whether the file is a secondary structure map format + A single column with an integer value which indicates the row that this row maps to. + check you make sure is structMap[10] = 380 then structMap[380] = 10. + """ + try: + fh = open( filename ) + line_num = 0 + rowidxmap = {} + while True: + line = fh.readline() + line_num += 1 + line = line.strip() + if not line: + break #EOF + if line: + try: + pointer = int(line) + if pointer > 0: + if pointer > line_num: + rowidxmap[line_num] = pointer + elif pointer < line_num & rowidxmap[pointer] != line_num: + return False + except ValueError: + return False + fh.close() + if count < 5 and count > 0: + return True + except: + pass + finally: + fh.close() + return False + +class SequenceAlignment( Fasta ): + file_ext = 'align' + def __init__(self, **kwd): + Fasta.__init__( self, **kwd ) + """Initialize AlignCheck datatype""" + + def sniff( self, filename ): + """ + Determines whether the file is in Mothur align fasta format + Each sequence line must be the same length + """ + + try: + fh = open( filename ) + len = -1 + while True: + line = fh.readline() + if not line: + break #EOF + line = line.strip() + if line: #first non-empty line + if line.startswith( '>' ): + #The next line.strip() must not be '', nor startwith '>' + line = fh.readline().strip() + if line == '' or line.startswith( '>' ): + break + if len < 0: + len = len(line) + elif len != len(line): + return False + else: + break #we found a non-empty line, but its not a fasta header + if len > 0: + return True + except: + pass + finally: + fh.close() + return False + +class AlignCheck( Tabular ): + file_ext = 'align.check' + def __init__(self, **kwd): + """Initialize AlignCheck datatype""" + Tabular.__init__( self, **kwd ) + self.column_names = ['name','pound','dash','plus','equal','loop','tilde','total'] + self.column_types = ['str','int','int','int','int','int','int','int'] + self.comment_lines = 1 + + def set_meta( self, dataset, overwrite = True, **kwd ): + # Tabular.set_meta( self, dataset, overwrite = overwrite, first_line_is_header = True, skip = 1 ) + data_lines = 0 + if dataset.has_data(): + dataset_fh = open( dataset.file_name ) + while True: + line = dataset_fh.readline() + if not line: break + data_lines += 1 + dataset_fh.close() + dataset.metadata.comment_lines = 1 + dataset.metadata.data_lines = data_lines - 1 if data_lines > 0 else 0 + dataset.metadata.column_names = self.column_names + dataset.metadata.column_types = self.column_types + +class AlignReport(Tabular): + """ +QueryName QueryLength TemplateName TemplateLength SearchMethod SearchScore AlignmentMethod QueryStart QueryEnd TemplateStart TemplateEnd PairwiseAlignmentLength GapsInQuery GapsInTemplate LongestInsert SimBtwnQuery&Template +AY457915 501 82283 1525 kmer 89.07 needleman 5 501 1 499 499 2 0 0 97.6 + """ + file_ext = 'align.report' + def __init__(self, **kwd): + """Initialize AlignCheck datatype""" + Tabular.__init__( self, **kwd ) + self.column_names = ['QueryName','QueryLength','TemplateName','TemplateLength','SearchMethod','SearchScore', + 'AlignmentMethod','QueryStart','QueryEnd','TemplateStart','TemplateEnd', + 'PairwiseAlignmentLength','GapsInQuery','GapsInTemplate','LongestInsert','SimBtwnQuery&Template' + ] + +class BellerophonChimera( Tabular ): + file_ext = 'bellerophon.chimera' + def __init__(self, **kwd): + """Initialize AlignCheck datatype""" + Tabular.__init__( self, **kwd ) + self.column_names = ['Name','Score','Left','Right'] + +class SecondaryStructureMatch(Tabular): + """ + name pound dash plus equal loop tilde total + 9_1_12 42 68 8 28 275 420 872 + 9_1_14 36 68 6 26 266 422 851 + 9_1_15 44 68 8 28 276 418 873 + 9_1_16 34 72 6 30 267 430 860 + 9_1_18 46 80 2 36 261 + """ + def __init__(self, **kwd): + """Initialize SecondaryStructureMatch datatype""" + Tabular.__init__( self, **kwd ) + self.column_names = ['name','pound','dash','plus','equal','loop','tilde','total'] + +class DistanceMatrix(data.Text): + file_ext = 'dist' + """Add metadata elements""" + MetadataElement( name="sequence_count", default=0, desc="Number of sequences", readonly=False, optional=True, no_value=0 ) + + +class LowerTriangleDistanceMatrix(DistanceMatrix): + file_ext = 'lower.dist' + def __init__(self, **kwd): + """Initialize secondary structure map datatype""" + DistanceMatrix.__init__( self, **kwd ) + + def sniff( self, filename ): + """ + Determines whether the file is a lower-triangle distance matrix (phylip) format + The first line has the number of sequences in the matrix. + The remaining lines have the sequence name followed by a list of distances from all preceeding sequences + 5 + U68589 + U68590 0.3371 + U68591 0.3609 0.3782 + U68592 0.4155 0.3197 0.4148 + U68593 0.2872 0.1690 0.3361 0.2842 + """ + try: + fh = open( filename ) + count = 0 + while True: + line = fh.readline() + line = line.strip() + if not line: + break #EOF + if line: + if line[0] != '@': + linePieces = line.split('\t') + if len(linePieces) != 3: + return False + try: + check = float(linePieces[2]) + except ValueError: + return False + count += 1 + if count == 5: + return True + fh.close() + if count < 5 and count > 0: + return True + except: + pass + finally: + fh.close() + return False + +class SquareDistanceMatrix(DistanceMatrix,Tabular): + file_ext = 'square.dist' + sequence_count = -1 + + def __init__(self, **kwd): + """Initialize secondary structure map datatype""" + Tabular.__init__( self, **kwd ) + def init_meta( self, dataset, copy_from=None ): + data.Text.init_meta( self, dataset, copy_from=copy_from ) + def set_meta( self, dataset, overwrite = True, skip = None, **kwd ): + dataset.metadata.sequences = 0 + + def sniff( self, filename ): + """ + Determines whether the file is a square distance matrix (Column-formatted distance matrix) format + The first line has the number of sequences in the matrix. + The following lines have the sequence name in the first column plus a column for the distance to each sequence + in the row order in which they appear in the matrix. + 3 + U68589 0.0000 0.3371 0.3610 + U68590 0.3371 0.0000 0.3783 + U68590 0.3371 0.0000 0.3783 + """ + try: + fh = open( filename ) + count = 0 + line = fh.readline() + line = line.strip() + sequence_count = int(line) + col_cnt = seq_cnt + 1 + while True: + line = fh.readline() + line = line.strip() + if not line: + break #EOF + if line: + if line[0] != '@': + linePieces = line.split('\t') + if len(linePieces) != col_cnt : + return False + try: + for i in range(1, col_cnt): + check = float(linePieces[i]) + except ValueError: + return False + count += 1 + if count == 5: + return True + fh.close() + if count < 5 and count > 0: + return True + except: + pass + finally: + fh.close() + return False + +class PairwiseDistanceMatrix(DistanceMatrix,Tabular): + file_ext = 'pair.dist' + def __init__(self, **kwd): + """Initialize secondary structure map datatype""" + Tabular.__init__( self, **kwd ) + self.column_names = ['Sequence','Sequence','Distance'] + self.column_types = ['str','str','float'] + self.comment_lines = 1 + + def sniff( self, filename ): + """ + Determines whether the file is a pairwise distance matrix (Column-formatted distance matrix) format + The first and second columns have the sequence names and the third column is the distance between those sequences. + """ + try: + fh = open( filename ) + count = 0 + while True: + line = fh.readline() + line = line.strip() + if not line: + break #EOF + if line: + if line[0] != '@': + linePieces = line.split('\t') + if len(linePieces) != 3: + return False + try: + check = float(linePieces[2]) + except ValueError: + return False + count += 1 + if count == 5: + return True + fh.close() + if count < 5 and count > 0: + return True + except: + pass + finally: + fh.close() + return False + +class AlignCheck(Tabular): + file_ext = 'align.check' + def __init__(self, **kwd): + """Initialize secondary structure map datatype""" + Tabular.__init__( self, **kwd ) + self.column_names = ['name','pound','dash','plus','equal','loop','tilde','total'] + self.columns = 8 + +class Names(Tabular): + file_ext = 'names' + def __init__(self, **kwd): + """Name file shows the relationship between a representative sequence(col 1) and the sequences(comma-separated) it represents(col 2)""" + Tabular.__init__( self, **kwd ) + self.column_names = ['name','representatives'] + self.columns = 2 + +class Summary(Tabular): + file_ext = 'summary' + def __init__(self, **kwd): + """summarizes the quality of sequences in an unaligned or aligned fasta-formatted sequence file""" + Tabular.__init__( self, **kwd ) + self.column_names = ['seqname','start','end','nbases','ambigs','polymer'] + self.columns = 6 + +class Group(Tabular): + file_ext = 'groups' + def __init__(self, **kwd): + """Name file shows the relationship between a representative sequence(col 1) and the sequences it represents(col 2)""" + Tabular.__init__( self, **kwd ) + self.column_names = ['name','group'] + self.columns = 2 + +class Design(Tabular): + file_ext = 'design' + def __init__(self, **kwd): + """Name file shows the relationship between a group(col 1) and a grouping (col 2), providing a way to merge groups.""" + Tabular.__init__( self, **kwd ) + self.column_names = ['group','grouping'] + self.columns = 2 + +class AccNos(Tabular): + file_ext = 'accnos' + def __init__(self, **kwd): + """A list of names""" + Tabular.__init__( self, **kwd ) + self.column_names = ['name'] + self.columns = 1 + +class Oligos( data.Text ): + file_ext = 'oligos' + + def sniff( self, filename ): + """ + Determines whether the file is a otu (operational taxonomic unit) format + """ + try: + fh = open( filename ) + count = 0 + while True: + line = fh.readline() + line = line.strip() + if not line: + break #EOF + else: + if line[0] != '#': + linePieces = line.split('\t') + if len(linePieces) == 2 and re.match('forward|reverse',linePieces[0]): + count += 1 + continue + elif len(linePieces) == 3 and re.match('barcode',linePieces[0]): + count += 1 + continue + else: + return False + if count > 20: + return True + if count > 0: + return True + except: + pass + finally: + fh.close() + return False + +class Frequency(Tabular): + file_ext = 'freq' + def __init__(self, **kwd): + """A list of names""" + Tabular.__init__( self, **kwd ) + self.column_names = ['position','frequency'] + self.column_types = ['int','float'] + + def sniff( self, filename ): + """ + Determines whether the file is a frequency tabular format for chimera analysis + #1.14.0 + 0 0.000 + 1 0.000 + ... + 155 0.975 + """ + try: + fh = open( filename ) + count = 0 + while True: + line = fh.readline() + line = line.strip() + if not line: + break #EOF + else: + if line[0] != '#': + try: + linePieces = line.split('\t') + i = int(linePieces[0]) + f = float(linePieces[1]) + count += 1 + continue + except: + return False + if count > 20: + return True + if count > 0: + return True + except: + pass + finally: + fh.close() + return False + +class Quantile(Tabular): + file_ext = 'quan' + MetadataElement( name="filtered", default=False, no_value=False, optional=True , desc="Quantiles calculated using a mask", readonly=True) + MetadataElement( name="masked", default=False, no_value=False, optional=True , desc="Quantiles calculated using a frequency filter", readonly=True) + def __init__(self, **kwd): + """Quantiles for chimera analysis""" + Tabular.__init__( self, **kwd ) + self.column_names = ['num','ten','twentyfive','fifty','seventyfive','ninetyfive','ninetynine'] + self.column_types = ['int','float','float','float','float','float','float'] + def set_meta( self, dataset, overwrite = True, skip = None, **kwd ): + log.info( "Mothur Quantile set_meta %s" % kwd) + def sniff( self, filename ): + """ + Determines whether the file is a quantiles tabular format for chimera analysis + 1 0 0 0 0 0 0 + 2 0.309198 0.309198 0.37161 0.37161 0.37161 0.37161 + 3 0.510982 0.563213 0.693529 0.858939 1.07442 1.20608 + ... + """ + try: + fh = open( filename ) + count = 0 + while True: + line = fh.readline() + line = line.strip() + if not line: + break #EOF + else: + if line[0] != '#': + try: + linePieces = line.split('\t') + i = int(linePieces[0]) + f = float(linePieces[1]) + f = float(linePieces[2]) + f = float(linePieces[3]) + f = float(linePieces[4]) + f = float(linePieces[5]) + f = float(linePieces[6]) + count += 1 + continue + except: + return False + if count > 10: + return True + if count > 0: + return True + except: + pass + finally: + fh.close() + return False + +class FilteredQuantile(Quantile): + file_ext = 'filtered.quan' + def __init__(self, **kwd): + """Quantiles for chimera analysis""" + Quantile.__init__( self, **kwd ) + self.filtered = True + +class MaskedQuantile(Quantile): + file_ext = 'masked.quan' + def __init__(self, **kwd): + """Quantiles for chimera analysis""" + Quantile.__init__( self, **kwd ) + self.masked = True + self.filtered = False + +class FilteredMaskedQuantile(Quantile): + file_ext = 'filtered.masked.quan' + def __init__(self, **kwd): + """Quantiles for chimera analysis""" + Quantile.__init__( self, **kwd ) + self.masked = True + self.filtered = True + +class LaneMask(data.Text): + file_ext = 'filter' + + def sniff( self, filename ): + """ + Determines whether the file is a lane mask filter: 1 line consisting of zeros and ones. + """ + try: + fh = open( filename ) + while True: + buff = fh.read(1000) + if not buff: + break #EOF + else: + if not re.match('^[01]+$',line): + return False + return True + except: + pass + finally: + close(fh) + return False + +class SequenceTaxonomy(Tabular): + file_ext = 'seq.taxonomy' + """ + A table with 2 columns: + - SequenceName + - Taxonomy (semicolon-separated taxonomy in descending order) + Example: + X56533.1 Eukaryota;Alveolata;Ciliophora;Intramacronucleata;Oligohymenophorea;Hymenostomatida;Tetrahymenina;Glaucomidae;Glaucoma; + X97975.1 Eukaryota;Parabasalidea;Trichomonada;Trichomonadida;unclassified_Trichomonadida; + AF052717.1 Eukaryota;Parabasalidea; + """ + def __init__(self, **kwd): + Tabular.__init__( self, **kwd ) + self.column_names = ['name','taxonomy'] + + def sniff( self, filename ): + """ + Determines whether the file is a SequenceTaxonomy + """ + try: + pat = '^([^ \t\n\r\f\v;]+([(]\d+[)])?[;])+$' + fh = open( filename ) + count = 0 + while True: + line = fh.readline() + if not line: + break #EOF + line = line.strip() + if line: + fields = line.split('\t') + if len(fields) != 2: + return False + if not re.match(pat,fields[1]): + return False + count += 1 + if count > 10: + break + if count > 0: + return True + except: + pass + finally: + fh.close() + return False + +class RDPSequenceTaxonomy(SequenceTaxonomy): + file_ext = 'rdp.taxonomy' + """ + A table with 2 columns: + - SequenceName + - Taxonomy (semicolon-separated taxonomy in descending order, RDP requires exactly 6 levels deep) + Example: + AB001518.1 Bacteria;Bacteroidetes;Sphingobacteria;Sphingobacteriales;unclassified_Sphingobacteriales; + AB001724.1 Bacteria;Cyanobacteria;Cyanobacteria;Family_II;GpIIa; + AB001774.1 Bacteria;Chlamydiae;Chlamydiae;Chlamydiales;Chlamydiaceae;Chlamydophila; + """ + def sniff( self, filename ): + """ + Determines whether the file is a SequenceTaxonomy + """ + try: + pat = '^([^ \t\n\r\f\v;]+([(]\d+[)])?[;]){6}$' + fh = open( filename ) + count = 0 + while True: + line = fh.readline() + if not line: + break #EOF + line = line.strip() + if line: + fields = line.split('\t') + if len(fields) != 2: + return False + if not re.match(pat,fields[1]): + return False + count += 1 + if count > 10: + break + if count > 0: + return True + except: + pass + finally: + fh.close() + return False + +class ConsensusTaxonomy(Tabular): + file_ext = 'cons.taxonomy' + def __init__(self, **kwd): + """A list of names""" + Tabular.__init__( self, **kwd ) + self.column_names = ['OTU','count','taxonomy'] + +class TaxonomySummary(Tabular): + file_ext = 'tax.summary' + def __init__(self, **kwd): + """A Summary of taxon classification""" + Tabular.__init__( self, **kwd ) + self.column_names = ['taxlevel','rankID','taxon','daughterlevels','total'] + +class Phylip(data.Text): + file_ext = 'phy' + + def sniff( self, filename ): + """ + Determines whether the file is in Phylip format (Interleaved or Sequential) + The first line of the input file contains the number of species and the + number of characters, in free format, separated by blanks (not by + commas). The information for each species follows, starting with a + ten-character species name (which can include punctuation marks and blanks), + and continuing with the characters for that species. + http://evolution.genetics.washington.edu/phylip/doc/main.html#inputfiles + Interleaved Example: + 6 39 + Archaeopt CGATGCTTAC CGCCGATGCT + HesperorniCGTTACTCGT TGTCGTTACT + BaluchitheTAATGTTAAT TGTTAATGTT + B. virginiTAATGTTCGT TGTTAATGTT + BrontosaurCAAAACCCAT CATCAAAACC + B.subtilisGGCAGCCAAT CACGGCAGCC + + TACCGCCGAT GCTTACCGC + CGTTGTCGTT ACTCGTTGT + AATTGTTAAT GTTAATTGT + CGTTGTTAAT GTTCGTTGT + CATCATCAAA ACCCATCAT + AATCACGGCA GCCAATCAC + """ + try: + fh = open( filename ) + # counts line + line = fh.readline().strip() + linePieces = line.split() + count = int(linePieces[0]) + seq_len = int(linePieces[1]) + # data lines + """ + TODO check data lines + while True: + line = fh.readline() + # name is the first 10 characters + name = line[0:10] + seq = line[10:].strip() + # nucleic base or amino acid 1-char designators (spaces allowed) + bases = ''.join(seq.split()) + # float per base (each separated by space) + """ + return True + except: + pass + finally: + close(fh) + return False + + +class Axes(Tabular): + file_ext = 'axes' + + def __init__(self, **kwd): + """Initialize axes datatype""" + Tabular.__init__( self, **kwd ) + def sniff( self, filename ): + """ + Determines whether the file is an axes format + The first line may have column headings. + The following lines have the name in the first column plus float columns for each axis. + ==> 98_sq_phylip_amazon.fn.unique.pca.axes <== + group axis1 axis2 + forest 0.000000 0.145743 + pasture 0.145743 0.000000 + + ==> 98_sq_phylip_amazon.nmds.axes <== + axis1 axis2 + U68589 0.262608 -0.077498 + U68590 0.027118 0.195197 + U68591 0.329854 0.014395 + """ + try: + fh = open( filename ) + count = 0 + line = fh.readline() + line = line.strip() + col_cnt = None + while True: + line = fh.readline() + line = line.strip() + if not line: + break #EOF + if line: + fields = line.split('\t') + if col_cnt == None: # ignore values in first line as they may be column headings + col_cnt = len(fields) + else: + if len(fields) != col_cnt : + return False + try: + for i in range(1, col_cnt): + check = float(fields[i]) + except ValueError: + return False + count += 1 + if count > 10: + return True + if count > 0: + return True + except: + pass + finally: + fh.close() + return False + +## Qiime Classes + +class QiimeMetadataMapping(Tabular): + MetadataElement( name="column_names", default=[], desc="Column Names", readonly=False, visible=True, no_value=[] ) + file_ext = 'qiimemapping' + + def __init__(self, **kwd): + """ + http://qiime.sourceforge.net/documentation/file_formats.html#mapping-file-overview + Information about the samples necessary to perform the data analysis. + # self.column_names = ['#SampleID','BarcodeSequence','LinkerPrimerSequence','Description'] + """ + Tabular.__init__( self, **kwd ) + + def sniff( self, filename ): + """ + Determines whether the file is a qiime mapping file + Just checking for an appropriate header line for now, could be improved + """ + try: + pat = '#SampleID(\t[a-zA-Z][a-zA-Z0-9_]*)*\tDescription' + fh = open( filename ) + while True: + line = dataset_fh.readline() + if re.match(pat,line): + return True + except: + pass + finally: + close(fh) + return False + + def set_column_names(self, dataset): + if dataset.has_data(): + dataset_fh = open( dataset.file_name ) + line = dataset_fh.readline() + if line.startswith('#SampleID'): + dataset.metadata.column_names = line.strip().split('\t'); + dataset_fh.close() + + def set_meta( self, dataset, overwrite = True, skip = None, max_data_lines = None, **kwd ): + Tabular.set_meta(self, dataset, overwrite, skip, max_data_lines) + self.set_column_names(dataset) + +class QiimeOTU(Tabular): + """ + Associates OTUs with sequence IDs + Example: + 0 FLP3FBN01C2MYD FLP3FBN01B2ALM + 1 FLP3FBN01DF6NE FLP3FBN01CKW1J FLP3FBN01CHVM4 + 2 FLP3FBN01AXQ2Z + """ + file_ext = 'qiimeotu' + +class QiimeOTUTable(Tabular): + """ + #Full OTU Counts + #OTU ID PC.354 PC.355 PC.356 Consensus Lineage + 0 0 1 0 Root;Bacteria;Firmicutes;"Clostridia";Clostridiales + 1 1 3 1 Root;Bacteria + 2 0 2 2 Root;Bacteria;Bacteroidetes + """ + MetadataElement( name="column_names", default=[], desc="Column Names", readonly=False, visible=True, no_value=[] ) + file_ext = 'qiimeotutable' + def init_meta( self, dataset, copy_from=None ): + tabular.Tabular.init_meta( self, dataset, copy_from=copy_from ) + def set_meta( self, dataset, overwrite = True, skip = None, **kwd ): + self.set_column_names(dataset) + def set_column_names(self, dataset): + if dataset.has_data(): + dataset_fh = open( dataset.file_name ) + line = dataset_fh.readline() + line = dataset_fh.readline() + if line.startswith('#OTU ID'): + dataset.metadata.column_names = line.strip().split('\t'); + dataset_fh.close() + dataset.metadata.comment_lines = 2 + +class QiimeDistanceMatrix(Tabular): + """ + PC.354 PC.355 PC.356 + PC.354 0.0 3.177 1.955 + PC.355 3.177 0.0 3.444 + PC.356 1.955 3.444 0.0 + """ + file_ext = 'qiimedistmat' + def init_meta( self, dataset, copy_from=None ): + tabular.Tabular.init_meta( self, dataset, copy_from=copy_from ) + def set_meta( self, dataset, overwrite = True, skip = None, **kwd ): + self.set_column_names(dataset) + def set_column_names(self, dataset): + if dataset.has_data(): + dataset_fh = open( dataset.file_name ) + line = dataset_fh.readline() + # first line contains the names + dataset.metadata.column_names = line.strip().split('\t'); + dataset_fh.close() + dataset.metadata.comment_lines = 1 + +class QiimePCA(Tabular): + """ + Principal Coordinate Analysis Data + The principal coordinate (PC) axes (columns) for each sample (rows). + Pairs of PCs can then be graphed to view the relationships between samples. + The bottom of the output file contains the eigenvalues and % variation explained for each PC. + Example: + pc vector number 1 2 3 + PC.354 -0.309063936588 0.0398252112257 0.0744672231759 + PC.355 -0.106593922619 0.141125998277 0.0780204374172 + PC.356 -0.219869362955 0.00917241121781 0.0357281314115 + + + eigvals 0.480220500471 0.163567082874 0.125594470811 + % variation explained 51.6955484555 17.6079322939 + """ + file_ext = 'qiimepca' + +class QiimeParams(Tabular): + """ +###pick_otus_through_otu_table.py parameters### + +# OTU picker parameters +pick_otus:otu_picking_method uclust +pick_otus:clustering_algorithm furthest + +# Representative set picker parameters +pick_rep_set:rep_set_picking_method first +pick_rep_set:sort_by otu + """ + file_ext = 'qiimeparams' + +class QiimePrefs(data.Text): + """ + A text file, containing coloring preferences to be used by make_distance_histograms.py, make_2d_plots.py and make_3d_plots.py. + Example: +{ +'background_color':'black', + +'sample_coloring': + { + 'Treatment': + { + 'column':'Treatment', + 'colors':(('red',(0,100,100)),('blue',(240,100,100))) + }, + 'DOB': + { + 'column':'DOB', + 'colors':(('red',(0,100,100)),('blue',(240,100,100))) + } + }, +'MONTE_CARLO_GROUP_DISTANCES': + { + 'Treatment': 10, + 'DOB': 10 + } +} + """ + file_ext = 'qiimeprefs' + +class QiimeTaxaSummary(Tabular): + """ + Taxon PC.354 PC.355 PC.356 + Root;Bacteria;Actinobacteria 0.0 0.177 0.955 + Root;Bacteria;Firmicutes 0.177 0.0 0.444 + Root;Bacteria;Proteobacteria 0.955 0.444 0.0 + """ + MetadataElement( name="column_names", default=[], desc="Column Names", readonly=False, visible=True, no_value=[] ) + file_ext = 'qiimetaxsummary' + + def set_column_names(self, dataset): + if dataset.has_data(): + dataset_fh = open( dataset.file_name ) + line = dataset_fh.readline() + if line.startswith('Taxon'): + dataset.metadata.column_names = line.strip().split('\t'); + dataset_fh.close() + + def set_meta( self, dataset, overwrite = True, skip = None, max_data_lines = None, **kwd ): + Tabular.set_meta(self, dataset, overwrite, skip, max_data_lines) + self.set_column_names(dataset) + +if __name__ == '__main__': + import doctest, sys + doctest.testmod(sys.modules[__name__]) + diff -r 2c9714f56480 -r d80000f5ad20 make_2d_plots.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/make_2d_plots.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,66 @@ + + Make 2D PCoA Plots + + make_2d_plots.py + + + qiime_wrapper.py + --galaxy_outputdir=$plot.extra_files_path + ##--galaxy_tmpdir='$__new_file_path__' + ##--galaxy_datasets='^\S+_2D_PCoA_plots\.html$:'$plot + --galaxy_datasets='^\S+\.html$:'$plot + ##--galaxy_datasetid=$output1.id + ##--galaxy_new_files_path='$__new_file_path__' + ##--galaxy_tmpdir='$__new_file_path__' + make_2d_plots.py + --coord_fname=$coord_fname + --map_fname=$map_fname + #if $colorby != None and $colorby.__str__ != 'None': + --colorby=$colorby + #end if + #if $prefs_path != None and $prefs_path.__str__ != 'None' and len($prefs_path.__str__) > 0: + --prefs_path=$prefs_path + #end if + --background_color=$background_color + --ellipsoid_opacity=$ellipsoid_opacity + --ellipsoid_method=$ellipsoid_method + #if $master_pcoa != None and $master_pcoa.__str__ != 'None' and len($master_pcoa.__str__) > 0: + --master_pcoa=$master_pcoa + #end if + --output_dir=$plot.extra_files_path + + + + + + + + + + + + + + + + + + + + For more information, see make_2d_plots_ in the Qiime documentation. + +Updated and validated 01/18/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + + .. _make_2d_plots: http://qiime.org/scripts/make_2d_plots.html + + diff -r 2c9714f56480 -r d80000f5ad20 make_3d_plots.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/make_3d_plots.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,89 @@ + + Make 3D PCoA plots + + make_3d_plots.py + + + qiime_wrapper.py + --galaxy_summary_html='$output_html' + --galaxy_outputdir='$output_html.extra_files_path' + --galaxy_summary_template='$output_template' + make_3d_plots.py + --coord_fname=$coord_fname + --map_fname=$map_fname + --colorby=$colorby + --custom_axes=$custom_axes + --prefs_path=$prefs_path + --background_color=$background_color + --output_dir=$__new_file_path__ + --ellipsoid_smoothness=$ellipsoid_smoothness + --ellipsoid_opacity=$ellipsoid_opacity + --ellipsoid_method=$ellipsoid_method + --taxa_fname=$taxa_fname + --n_taxa_keep=$n_taxa_keep + --biplot_output_file=$biplot_output_file + --master_pcoa=$master_pcoa + --output_format=$output_format + --interpolation_points=$interpolation_points + --polyhedron_points=$polyhedron_points + --polyhedron_offset=$polyhedron_offset + + + + + + + + + + + + + + + + + + + + + + + + + + + +weighted_unifrac_pc.txt_3D.html + + +]]> + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 make_bootstrapped_tree.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/make_bootstrapped_tree.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,29 @@ + + Make bootstrapped tree + + make_bootstrapped_tree.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + make_bootstrapped_tree.py + --master_tree=$master_tree + --support=$support + --output_file=$output_file + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 make_distance_histograms.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/make_distance_histograms.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,56 @@ + + Make distance histograms + + make_distance_histograms.py + + + qiime_wrapper.py + --galaxy_outputdir=$plot.extra_files_path + --galaxy_datasets='^\S+\.html$:'$plot + make_distance_histograms.py + --distance_matrix_file=$distance_matrix_file + --map_fname=$map_fname + #if $prefs_path != None and $prefs_path.__str__ != 'None' and len($prefs_path.__str__) > 0: + --prefs_path=$prefs_path + #end if + --dir_path=$plot.extra_files_path + --background_color=$background_color + $monte_carlo + #if $fields != None and $fields.__str__ != ' ' and $fields.__str__ !='': + --fields=$fields + #end if + --monte_carlo_iters=$monte_carlo_iters + + + + + + + + + + + + + + + + + + + For more information, see make_distance_histograms_ in the Qiime documentation. + +Updated and validated 01/18/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + + .. _make_distance_histograms: http://qiime.org/scripts/make_distance_histograms.html + + diff -r 2c9714f56480 -r d80000f5ad20 make_fastq.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/make_fastq.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,34 @@ + + Make fastq file for ERA submission from paired fasta and qual files + + make_fastq.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + make_fastq.py + --input_fasta_fp=$input_fasta_fp + --qual=$qual + --result_fp=$result_fp + $split + + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 make_library_id_lists.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/make_library_id_lists.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,40 @@ + + Make library id lists + + make_library_id_lists.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + make_library_id_lists.py + --input_fasta=$input_fasta + --screened_rep_seqs=$screened_rep_seqs + --otus=$otus + --outdir=$outdir + --field=$field + $debug + + + + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 make_otu_heatmap_html.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/make_otu_heatmap_html.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,60 @@ + + Make heatmap of OTU table + + make_otu_heatmap_html.py + + + qiime_wrapper.py + --galaxy_outputdir='$otu_heatmp.extra_files_path' + --galaxy_datasets='^\S+\.html$:'$otu_heatmp + make_otu_heatmap_html.py + --otu_table_fp=$otu_table_fp + --output_dir='$otu_heatmp.extra_files_path' + --num_otu_hits=$num_otu_hits + #if $tree != None and $tree.__str__ != 'None': + --tree=$tree + #end if + #if $map_fname != None and $map_fname.__str__ != 'None' > 0: + --map_fname=$map_fname + #end if + #if $sample_tree != None and $sample_tree.__str__ != 'None': + --sample_tree=$sample_tree + #end if + $log_transform + --log_eps=$log_eps + + + + + + + + + + + + + + + + + For more information, see make_otu_heatmap_html_ in the Qiime documentation. + +Updated and validated 02/10/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + +.. _make_otu_heatmap_html: http://qiime.org/scripts/make_otu_heatmap_html.html + + diff -r 2c9714f56480 -r d80000f5ad20 make_otu_network.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/make_otu_network.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,38 @@ + + Make an OTU network and calculate statistics + + make_otu_network.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + make_otu_network.py + --mapping_file=$mapping_file + --input_file=$input_file + --colorby=$colorby + --prefs_path=$prefs_path + --background_color=$background_color + --output_dir=$__new_file_path__ + + + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 make_otu_table.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/make_otu_table.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,39 @@ + + Make OTU table + + make_otu_table.py + + + qiime_wrapper.py + make_otu_table.py + --otu_map_fp=$otu_map_fp + --output_fp=$output_fp + #if $taxonomy.__str__ != 'None': + --taxonomy=$taxonomy + #end if + #if $exclude_otus_fp.__str__ != 'None': + --exclude_otus_fp=$exclude_otus_fp + #end if + + + + + + + + + + + + For more information, see make_otu_table_ in the Qiime documentation. + +Updated and validated 01/16/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + +.. _make_otu_table: http://qiime.org/scripts/make_otu_table + + diff -r 2c9714f56480 -r d80000f5ad20 make_per_library_sff.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/make_per_library_sff.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,37 @@ + + Make per-library sff files from id lists + + make_per_library_sff.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + make_per_library_sff.py + --input_sff=$input_sff + --libdir=$libdir + --sfffile_path=$sfffile_path + $use_sfftools + $debug + + + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 make_phylogeny.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/make_phylogeny.pl Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,32 @@ +#!/usr/bin/perl +# (c) 2011 Amanda Zuzolo c.o. George Mason University + +# 08/22/2011 Created + +### give start time; define input and output ### +my $start = time(); + +my $inputfasta = @ARGV[0]; +my $tree_method = @ARGV[1]; +my $root_method = @ARGV[2]; +my $result_file = @ARGV[3]; + +## change directory; concat filenames to temp ## +chdir "\/tmp"; + +my $tempfasta = "temp_fasta_$userid$start.fasta"; +#my $tempoutput = "tempout_$userid$start.txt"; +#my $outputdir = "$userid$start"; +system("cat $inputfasta > $tempfasta"); + +##### make system call to run qiime script ##### + +system("make_phylogeny.py -i $tempfasta -t $tree_method -r $root_method"); + +## cat qiime tre file to output; delete temps ## + +system("cat temp_fasta_$userid$start.tre > $result_file"); +system("rm $tempfasta"); +system("rm temp_fasta_$userid$start.tre"); + +print $result_file; diff -r 2c9714f56480 -r d80000f5ad20 make_phylogeny.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/make_phylogeny.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,49 @@ + + Make Phylogeny + + make_phylogeny.py + + + qiime_wrapper.py + --galaxy_inputdir='$__new_file_path__' + --galaxy_ext_change='$input_fp' + --galaxy_new_ext='fasta' + make_phylogeny.py + --input_fp='$__new_file_path__'/temporary.fasta + --tree_method=$tree_method + --result_fp=$result_fp + --log_fp=$log_fp + --root_method=$root_method + + + + + + + + + + + + + + + + + + + + + + For more information, see make_phylogeny_ in the Qiime documentation. + +Updated and validated 01/16/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + +.. _make_phylogeny: http://qiime.org/scripts/make_phylogeny.html + + diff -r 2c9714f56480 -r d80000f5ad20 make_pie_charts.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/make_pie_charts.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,46 @@ + + Make pie charts based on taxonomy assignment + + make_pie_charts.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + make_pie_charts.py + --input_files=$input_files + --labels=$labels + $sample_flag + --num=$num + --dir-prefix=$dir_prefix + --colorby=$colorby + --prefs_path=$prefs_path + --background_color=$background_color + + + + + + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 make_prefs_file.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/make_prefs_file.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,54 @@ + + Generate preferences file + + make_prefs_file.py + + + qiime_wrapper.py + make_prefs_file.py + --map_fname=$map_fname + --output_fp=$output_fp + #if $mapping_headers_to_use != None and $mapping_headers_to_use.__str__ != '': + --mapping_headers_to_use=$mapping_headers_to_use + #end if + --background_color=$background_color + --monte_carlo_dists=$monte_carlo_dists + #if $input_taxa_file != None and $input_taxa_file.__str__ != '' and $input_taxa_file.__str__ != 'None': + --input_taxa_file=$input_taxa_file + #end if + --ball_scale=$ball_scale + --arrow_line_color=$arrow_line_color + --arrow_head_color=$arrow_head_color + + + + + + + + + + + + + + + + + For more information, see make_prefs_file_ in the Qiime documentation. + +Updated and validated 01/18/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + + .. _make_prefs_file: http://qiime.org/scripts/make_prefs_file.html + + diff -r 2c9714f56480 -r d80000f5ad20 make_qiime_py_file.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/make_qiime_py_file.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,38 @@ + + Create python file + + make_qiime_py_file.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + make_qiime_py_file.py + --output_fp=$output_fp + $script + $test + --author_name=$author_name + --author_email=$author_email + --copyright=$copyright + + + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 make_qiime_rst_file.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/make_qiime_rst_file.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,26 @@ + + Make Sphinx RST file + + make_qiime_rst_file.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + make_qiime_rst_file.py + --input_script=$input_script + --output_dir=$__new_file_path__ + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 make_rarefaction_plots.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/make_rarefaction_plots.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,62 @@ + + Generate Rarefaction Plots + + make_rarefaction_plots.py + + + qiime_wrapper.py + --galaxy_datasets='^\S+\.html$:'$plot + --galaxy_outputdir=$plot.extra_files_path + make_rarefaction_plots.py + --input_dir=$input_dir.extra_files_path + --map_fname=$map_fname + #if $colorby != None and $colorby.__str__ != 'None' and len($colorby.__str__) > 0: + --colorby=$colorby + #end if + #if $prefs_path != None and $prefs_path.__str__ != 'None': + --prefs_path=$prefs_path + #end if + #if $ymax != None and $ymax.__str__ != '': + --ymax=$ymax + #end if + --background_color=$background_color + --imagetype=$imagetype + --resolution=$resolution + --output_dir=$plot.extra_files_path + + + + + + + + + + + + + + + + + + + + + + + + This tool takes the log file output from collate_alpha to create an html file of rarefaction plots wherein you can plot by sample and/or by category. + +For more information, see make_rarefaction_plots_ in the Qiime documentation. + +Updated and validated 01/16/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + +.. _make_rarefaction_plots: http://qiime.org/scripts/make_rarefaction_plots.html + + diff -r 2c9714f56480 -r d80000f5ad20 mbac_align_seqs.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mbac_align_seqs.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,127 @@ + + Align sequences using a variety of alignment methods + + align_seqs.py + + + qiime_wrapper.py + --galaxy_outputdir='$log.extra_files_path' + --galaxy_datasets='^\S+_aligned\.\S+$:'$aligned_fasta,'^\S+_log\.txt$:'$log,'^\S+_failures\.fasta$:'$failures + align_seqs.py + --input_fasta_fp=$input_fasta_fp + --alignment_method=$align.alignment_method + #if $align.alignment_method == 'pynast': + --template_fp=$align.alignment.template_fp + --pairwise_alignment_method=$align.pairwise_alignment_method + --min_length=$align.min_length + --min_percent_id=$align.min_percent_id + --blast_db=$align.blast_db + #elif $align.alignment_method == 'infernal': + --template_fp=$align.alignment.template_fp + #elif $align.alignment_method == 'clustalw': + #echo '' + #elif $align.alignment_method == 'muscle': + #echo '' + #elif $align.alignment_method == 'mafft': + #echo '' + #end if + + --output_dir='$log.extra_files_path' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + align['alignment_method'] == 'pynast' + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 mbac_alpha_diversity.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mbac_alpha_diversity.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,61 @@ + + Calculate alpha diversity on each sample in an otu table, using a variety of alpha diversity metrics + + alpha_diversity.py + + + qiime_wrapper.py + alpha_diversity.py + --input_path=$input_path + --output_path=$output_path + --metrics=$metrics + #if $tree_path.__str__ != 'None' and len($tree_path.__str__) != 0: + --tree_path=$tree_path + #end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 mbac_pick_otus.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mbac_pick_otus.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,161 @@ + + OTU picking [VALIDATED 8/18/2011] + + pick_otus.py + + + qiime_wrapper.py + --galaxy_outputdir='$log.extra_files_path' + #if $pick.otu_picking_method == 'uclust' and $pick.refseqs_fp.__str__ != 'None': + --galaxy_datasets='^\S+_otus\.txt$:'$otus,'^\S+_otus\.log$:'$log,'^\S+_failures\.txt$:'$failures + #else: + --galaxy_datasets='^\S+_otus\.txt$:'$otus,'^\S+_otus\.log$:'$log + #end if + pick_otus.py + --input_seqs_filepath=$input_seqs_filepath + #if $pick.otu_picking_method == 'uclust': + #if $pick.refseqs_fp.__str__ != 'None': + --refseqs_fp=$pick.refseqs_fp + --otu_picking_method='uclust_ref' + $pick.suppress_new_clusters + #else: + --otu_picking_method=$pick.otu_picking_method + #end if + --similarity=$pick.similarity + $pick.enable_rev_strand_match + $pick.optimal_uclust + $pick.exact_uclust + $pick.user_sort + $pick.suppress_presort_by_abundance_uclust + --max_accepts=$pick.max_accepts + --max_rejects=$pick.max_rejects + --uclust_otu_id_prefix=$pick.uclust_otu_id_prefix + $pick.uclust_stable_sort + $pick.save_uc_files + #elif $pick.otu_picking_method == 'mothur': + --otu_picking_method=$pick.otu_picking_method + --clustering_algorithm=$pick.clustering_algorithm + --similarity=$pick.similarity + #elif $pick.otu_picking_method == 'trie': + --otu_picking_method=$pick.otu_picking_method + $pick.trie_reverse_seqs + #elif $pick.otu_picking_method == 'prefix_suffix': + --otu_picking_method=$pick.otu_picking_method + --prefix_length=$pick.prefix_length + --suffix_length=$pick.suffix_length + #elif pick.otu_picking_method == 'blast': + --otu_picking_method=$pick.otu_picking_method + #if $refseqs_fp.__str__ != 'None': + --refseqs_fp=$pick.refseqs_fp + #end if + --blast_db=$pick.blast_db + --similarity=$pick.similarity + --max_e_value=$pick.max_e_value + --min_aligned_percent=$pick.min_aligned_percent + #elif $pick.otu_picking_method == 'cdhit': + --otu_picking_method=$pick.otu_picking_method + --similarity=$pick.similarity + --max_cdhit_memory=$pick.max_cdhit_memory + --prefix_prefilter_length=$pick.prefix_prefilter_length + $pick.trie_prefilter + #end if + --output_dir='$log.extra_files_path' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + (pick['otu_picking_method'] == 'uclust' and pick['refseqs_fp']) + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 mbac_split_libraries.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mbac_split_libraries.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,132 @@ + + Split libraries according to barcodes specified in mapping file [VALIDATED 8/18/2011] + + split_libraries.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + --galaxy_datasets='^seqs.fna$:'$sequences,'histograms.txt:'$histograms,'split_library_log.txt:'$log + split_libraries.py + --map=$map + #set fnas = [] + #for i in $inputs: + #set fnas = $fnas + [$i.fasta.__str__] + #end for + --fasta=#echo ','.join($fnas) + #set quals = [] + #for i in $inputs: + #if $i.qual != None and $i.qual.__str__ != 'None': + #set quals = $quals + [$i.qual.__str__] + #end if + #end for + #if len($quals) > 0: + --qual=#echo ','.join($quals) + #end if + #if len($min_seq_length.__str__) > 0 and $min_seq_length > 0: + --min-seq-length=$min_seq_length + #end if + #if len($max_seq_length.__str__) > 0: + --max-seq-length=$max_seq_length + #end if + $trim_seq_length + #if len($min_qual_score.__str__) > 0: + --min-qual-score=$min_qual_score + #end if + $keep_primer + $keep_barcode + #if len($max_ambig.__str__) > 0: + --max-ambig=$max_ambig + #end if + #if len($max_homopolymer.__str__) > 0: + --max-homopolymer=$max_homopolymer + #end if + #if len($max_primer_mismatch.__str__) > 0: + --max-primer-mismatch=$max_primer_mismatch + #end if + --barcode-type=$barcode_type + ## --dir-prefix=$dir_prefix + #if $max_barcode_errors >= 0.: + --max-barcode-errors=$max_barcode_errors + #end if + #if len($start_numbering_at.__str__) > 0: + --start-numbering-at=$start_numbering_at + #end if + $remove_unassigned + $disable_bc_correction + #if len($qual_score_window.__str__) > 0: + --qual_score_window=$qual_score_window + #end if + $disable_primers + --reverse_primers=$reverse_primers + $record_qual_scores + $discard_bad_windows + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 merge_denoiser_output.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/merge_denoiser_output.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,35 @@ + + Merge the output of denoising step back into QIIME + + merge_denoiser_output.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + merge_denoiser_output.py + --map_file=$map_file + --otu_picker_map_file=$otu_picker_map_file + --fasta_fp=$fasta_fp + --denoised_fasta_fp=$denoised_fasta_fp + --output_dir=$__new_file_path__ + + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 merge_mapping_files.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/merge_mapping_files.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,29 @@ + + Merge mapping files + + merge_mapping_files.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + merge_mapping_files.py + --mapping_fps=$mapping_fps + --output_fp=$output_fp + --no_data_value=$no_data_value + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 merge_otu_maps.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/merge_otu_maps.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,29 @@ + + Merge OTU mapping files + + merge_otu_maps.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + merge_otu_maps.py + --otu_map_fps=$otu_map_fps + --output_fp=$output_fp + --failures_fp=$failures_fp + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 merge_otu_tables.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/merge_otu_tables.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,26 @@ + + + + merge_otu_tables.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + merge_otu_tables.py + --input_fps=$input_fps + --output_fp=$output_fp + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 multiple_rarefactions.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/multiple_rarefactions.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,51 @@ + + Perform multiple subsamplings/rarefactions on an otu table + + multiple_rarefactions.py + + + qiime_wrapper.py + --galaxy_logfile=$output1 + --galaxy_outputdir=$output1.extra_files_path + multiple_rarefactions.py + --input_path=$input_path + --output_path=$output1.extra_files_path + --min=$min + --max=$max + --step=$step + --num-reps=$num_reps + $lineages_included + $keep_empty_otus + + + + + + + + + + + + + + + + This tool rarefies OTU tables for use in jackknife, bootstrap, and rarefaction analyses. Samples with fewer sequences than the rarefaction depth requested for a given output otu table are omitted from those otu tables. The input is an OTU table (e.g., the output from make_otu_table). The output file is a log file listing all the rarefied otu tables produced. + +For more information, see multiple_rarefactions_ in the Qiime documentation. + +Updated and validated 01/16/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + +.. _multiple_rarefactions: http://qiime.org/scripts/multiple_rarefactions.html + + diff -r 2c9714f56480 -r d80000f5ad20 multiple_rarefactions_even_depth.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/multiple_rarefactions_even_depth.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,38 @@ + + Perform multiple rarefactions on a single otu table, at one depth of sequences/sample + + multiple_rarefactions_even_depth.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + multiple_rarefactions_even_depth.py + --input_path=$input_path + --output_path=$output_path + --depth=$depth + --num-reps=$num_reps + $lineages_included + $keep_empty_otus + + + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 otu_category_significance.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/otu_category_significance.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,44 @@ + + OTU significance and co-occurence analysis + + otu_category_significance.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + otu_category_significance.py + --otu_table_fp=$otu_table_fp + --category_mapping_fp=$category_mapping_fp + --category=$category + --test=$test + --output_fp=$output_fp + --filter=$filter + --threshold=$threshold + --otu_include_fp=$otu_include_fp + + + + + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 parallel_align_seqs_pynast.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/parallel_align_seqs_pynast.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,81 @@ + + Parallel sequence alignment using PyNAST + + parallel_align_seqs_pynast.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + parallel_align_seqs_pynast.py + --input_fasta_fp=$input_fasta_fp + --output_dir=$__new_file_path__ + --template_fp=$template_fp + --pairwise_alignment_method=$pairwise_alignment_method + --blast_db=$blast_db + --min_length=$min_length + --min_percent_id=$min_percent_id + --align_seqs_fp=$align_seqs_fp + --jobs_to_start=$jobs_to_start + --poller_fp=$poller_fp + $retain_temp_files + $suppress_submit_jobs + $poll_directly + --cluster_jobs_fp=$cluster_jobs_fp + $suppress_polling + --job_prefix=$job_prefix + --python_exe_fp=$python_exe_fp + --seconds_to_sleep=$seconds_to_sleep + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 parallel_alpha_diversity.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/parallel_alpha_diversity.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,65 @@ + + Parallel alpha diversity + + parallel_alpha_diversity.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + parallel_alpha_diversity.py + --input_path=$input_path + --output_path=$output_path + --metrics=$metrics + --tree_path=$tree_path + --alpha_diversity_fp=$alpha_diversity_fp + --poller_fp=$poller_fp + $retain_temp_files + $suppress_submit_jobs + $poll_directly + --cluster_jobs_fp=$cluster_jobs_fp + $suppress_polling + --job_prefix=$job_prefix + --python_exe_fp=$python_exe_fp + --seconds_to_sleep=$seconds_to_sleep + --jobs_to_start=$jobs_to_start + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 parallel_assign_taxonomy_blast.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/parallel_assign_taxonomy_blast.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,74 @@ + + Parallel taxonomy assignment using BLAST + + parallel_assign_taxonomy_blast.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + parallel_assign_taxonomy_blast.py + --input_fasta_fp=$input_fasta_fp + --id_to_taxonomy_fp=$id_to_taxonomy_fp + --output_dir=$__new_file_path__ + --reference_seqs_fp=$reference_seqs_fp + --blast_db=$blast_db + --e_value=$e_value + --blastmat_dir=$blastmat_dir + --assign_taxonomy_fp=$assign_taxonomy_fp + --jobs_to_start=$jobs_to_start + --poller_fp=$poller_fp + $retain_temp_files + $suppress_submit_jobs + $poll_directly + --cluster_jobs_fp=$cluster_jobs_fp + $suppress_polling + --job_prefix=$job_prefix + --python_exe_fp=$python_exe_fp + --seconds_to_sleep=$seconds_to_sleep + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 parallel_assign_taxonomy_rdp.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/parallel_assign_taxonomy_rdp.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,65 @@ + + Parallel taxonomy assignment using RDP + + parallel_assign_taxonomy_rdp.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + parallel_assign_taxonomy_rdp.py + --input_fasta_fp=$input_fasta_fp + --output_dir=$__new_file_path__ + --rdp_classifier_fp=$rdp_classifier_fp + --confidence=$confidence + --assign_taxonomy_fp=$assign_taxonomy_fp + --jobs_to_start=$jobs_to_start + --poller_fp=$poller_fp + $retain_temp_files + $suppress_submit_jobs + $poll_directly + --cluster_jobs_fp=$cluster_jobs_fp + $suppress_polling + --job_prefix=$job_prefix + --python_exe_fp=$python_exe_fp + --seconds_to_sleep=$seconds_to_sleep + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 parallel_beta_diversity.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/parallel_beta_diversity.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,68 @@ + + Parallel beta diversity + + parallel_beta_diversity.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + parallel_beta_diversity.py + --input_path=$input_path + --output_path=$output_path + --metrics=$metrics + --tree_path=$tree_path + --beta_diversity_fp=$beta_diversity_fp + --poller_fp=$poller_fp + $retain_temp_files + $suppress_submit_jobs + $poll_directly + --cluster_jobs_fp=$cluster_jobs_fp + $suppress_polling + --job_prefix=$job_prefix + --python_exe_fp=$python_exe_fp + --seconds_to_sleep=$seconds_to_sleep + --jobs_to_start=$jobs_to_start + $full_tree + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 parallel_blast.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/parallel_blast.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,80 @@ + + Parallel BLAST + + parallel_blast.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + parallel_blast.py + --infile_path=$infile_path + --refseqs_path=$refseqs_path + --output_dir=$__new_file_path__ + $disable_low_complexity_filter + --e_value=$e_value + --num_hits=$num_hits + --word_size=$word_size + $suppress_format_blastdb + --blastmat_dir=$blastmat_dir + --blastall_fp=$blastall_fp + --jobs_to_start=$jobs_to_start + --poller_fp=$poller_fp + $retain_temp_files + $suppress_submit_jobs + $poll_directly + --cluster_jobs_fp=$cluster_jobs_fp + $suppress_polling + --job_prefix=$job_prefix + --python_exe_fp=$python_exe_fp + --seconds_to_sleep=$seconds_to_sleep + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 parallel_identify_chimeric_seqs.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/parallel_identify_chimeric_seqs.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,89 @@ + + Parallel chimera detection + + parallel_identify_chimeric_seqs.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + parallel_identify_chimeric_seqs.py + --input_fasta_fp=$input_fasta_fp + --aligned_reference_seqs_fp=$aligned_reference_seqs_fp + --id_to_taxonomy_fp=$id_to_taxonomy_fp + --reference_seqs_fp=$reference_seqs_fp + --blast_db=$blast_db + --chimera_detection_method=$chimera_detection_method + --num_fragments=$num_fragments + --taxonomy_depth=$taxonomy_depth + --max_e_value=$max_e_value + --min_div_ratio=$min_div_ratio + --output_fp=$output_fp + --identify_chimeric_seqs_fp=$identify_chimeric_seqs_fp + --jobs_to_start=$jobs_to_start + --poller_fp=$poller_fp + $retain_temp_files + $suppress_submit_jobs + $poll_directly + --cluster_jobs_fp=$cluster_jobs_fp + $suppress_polling + --job_prefix=$job_prefix + --python_exe_fp=$python_exe_fp + --seconds_to_sleep=$seconds_to_sleep + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 parallel_multiple_rarefactions.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/parallel_multiple_rarefactions.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,74 @@ + + Parallel multiple file rarefaction + + parallel_multiple_rarefactions.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + parallel_multiple_rarefactions.py + --input_path=$input_path + --output_path=$output_path + --min=$min + --max=$max + --step=$step + --num-reps=$num_reps + $lineages_included + --single_rarefaction_fp=$single_rarefaction_fp + --poller_fp=$poller_fp + $retain_temp_files + $suppress_submit_jobs + $poll_directly + --cluster_jobs_fp=$cluster_jobs_fp + $suppress_polling + --job_prefix=$job_prefix + --python_exe_fp=$python_exe_fp + --seconds_to_sleep=$seconds_to_sleep + --jobs_to_start=$jobs_to_start + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 parallel_pick_otus_blast.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/parallel_pick_otus_blast.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,74 @@ + + Parallel pick otus using BLAST + + parallel_pick_otus_blast.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + parallel_pick_otus_blast.py + --input_fasta_fp=$input_fasta_fp + --output_dir=$__new_file_path__ + --max_e_value=$max_e_value + --similarity=$similarity + --refseqs_fp=$refseqs_fp + --blast_db=$blast_db + --min_aligned_percent=$min_aligned_percent + --pick_otus_fp=$pick_otus_fp + --jobs_to_start=$jobs_to_start + --poller_fp=$poller_fp + $retain_temp_files + $suppress_submit_jobs + $poll_directly + --cluster_jobs_fp=$cluster_jobs_fp + $suppress_polling + --job_prefix=$job_prefix + --python_exe_fp=$python_exe_fp + --seconds_to_sleep=$seconds_to_sleep + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 parallel_pick_otus_uclust_ref.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/parallel_pick_otus_uclust_ref.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,95 @@ + + Parallel pick otus using uclust_ref + + parallel_pick_otus_uclust_ref.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + parallel_pick_otus_uclust_ref.py + --input_fasta_fp=$input_fasta_fp + --output_dir=$__new_file_path__ + --refseqs_fp=$refseqs_fp + --similarity=$similarity + $enable_rev_strand_match + $optimal_uclust + $exact_uclust + --max_accepts=$max_accepts + --max_rejects=$max_rejects + --stepwords=$stepwords + --word_length=$word_length + $uclust_stable_sort + $suppress_uclust_stable_sort + $save_uc_files + --pick_otus_fp=$pick_otus_fp + --jobs_to_start=$jobs_to_start + --poller_fp=$poller_fp + $retain_temp_files + $suppress_submit_jobs + $poll_directly + --cluster_jobs_fp=$cluster_jobs_fp + $suppress_polling + --job_prefix=$job_prefix + --python_exe_fp=$python_exe_fp + --seconds_to_sleep=$seconds_to_sleep + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 per_library_stats.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/per_library_stats.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,35 @@ + + Calculate per library statistics + + per_library_stats.py + + + qiime_wrapper.py + per_library_stats.py + --otu_table_fp=$otu_table_fp + #if $mapfile != None and $mapfile.__str__ != 'None' and $mapfile.__str__ != '': + --mapfile=$mapfile + #end if + --outputfile=$outputfile + + + + + + + + + + + .. class:: warningmark Warning: log data from standard output currently not available. + +For more information, see per_library_stats_ in the Qiime documentation. + +Updated and validated 01/18/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + + .. _per_library_stats: http://qiime.org/scripts/per_library_stats.html + + diff -r 2c9714f56480 -r d80000f5ad20 pick_otus.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pick_otus.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,171 @@ + + OTU picking + + pick_otus.py + + + qiime_wrapper.py + --galaxy_outputdir='$log.extra_files_path' + #if $pick.otu_picking_method == 'uclust' and $pick.refseqs_fp.__str__ != 'None': + --galaxy_datasets='^\S+_otus\.txt$:'$otus,'^\S+_otus\.log$:'$log,'^\S+_failures\.txt$:'$failures + #else: + --galaxy_datasets='^\S+_otus\.txt$:'$otus,'^\S+_otus\.log$:'$log + #end if + pick_otus.py + --input_seqs_filepath=$input_seqs_filepath + #if $pick.otu_picking_method.__str__ == 'uclust': + #if $pick.refseqs_fp.__str__ != 'None': + --refseqs_fp=$pick.refseqs_fp + --otu_picking_method='uclust_ref' + $pick.suppress_new_clusters + #else: + --otu_picking_method=$pick.otu_picking_method + #end if + --similarity=$pick.similarity + $pick.enable_rev_strand_match + $pick.optimal_uclust + $pick.exact_uclust + $pick.user_sort + $pick.suppress_presort_by_abundance_uclust + --max_accepts=$pick.max_accepts + --max_rejects=$pick.max_rejects + #if $pick.uclust_otu_id_prefix != None and $pick.uclust_otu_id_prefix.__str__ != 'None' and $pick.uclust_otu_id_prefix.__str__ != '': + --uclust_otu_id_prefix=$pick.uclust_otu_id_prefix + #end if + $pick.uclust_stable_sort + $pick.save_uc_files + #elif $pick.otu_picking_method.__str__ == 'mothur': + --otu_picking_method=$pick.otu_picking_method + --clustering_algorithm=$pick.clustering_algorithm + --similarity=$pick.similarity + #elif $pick.otu_picking_method.__str__ == 'trie': + --otu_picking_method=$pick.otu_picking_method + $pick.trie_reverse_seqs + #elif $pick.otu_picking_method.__str__ == 'prefix_suffix': + --otu_picking_method=$pick.otu_picking_method + --prefix_length=$pick.prefix_length + --suffix_length=$pick.suffix_length + #elif pick.otu_picking_method.__str__ == 'blast': + --otu_picking_method=$pick.otu_picking_method + #if $refseqs_fp.__str__ != 'None': + --refseqs_fp=$pick.refseqs_fp + #end if + #if $pick.blast_db != None and $pick.blast_db.__str__ != 'None' and $pick.blast_db.__str__ != '': + --blast_db=$pick.blast_db + #end if + --similarity=$pick.similarity + --max_e_value=$pick.max_e_value + --min_aligned_percent=$pick.min_aligned_percent + #elif $pick.otu_picking_method == 'cdhit': + --otu_picking_method=$pick.otu_picking_method + --similarity=$pick.similarity + --max_cdhit_memory=$pick.max_cdhit_memory + #if $pick.prefix_prefilter_length != 0: + --prefix_prefilter_length=$pick.prefix_prefilter_length + #end if + $pick.trie_prefilter + #end if + --output_dir='$log.extra_files_path' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + (pick['otu_picking_method'] == 'uclust' and pick['refseqs_fp']) + + + + + For more information, see pick_otus_ in the Qiime documentation. + +Updated and validated 01/16/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + +.. _pick_otus: http://qiime.org/scripts/pick_otus.html + + diff -r 2c9714f56480 -r d80000f5ad20 pick_otus_through_otu_table.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pick_otus_through_otu_table.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,203 @@ + + A workflow script for picking OTUs through building OTU tables + + pick_otus_through_otu_table.py + + + qiime_wrapper.py + --galaxy_outputdir='$log.extra_files_path' + --galaxy_datasets='^log_\S+\.txt$:'$log,'^\S+_otus.txt$:'$seqs_otus,'^\S+_rep_set.fasta$:'$seqs_rep_set,'^\S+_rep_set_tax_assignments.txt$:'$seqs_rep_set_tax_assignments,'^\S+_otu_table.txt$:'$seqs_otu_table,'^\S+_rep_set_aligned.fasta$:'$seqs_rep_set_aligned,'^\S+_rep_set_aligned_pfiltered.fasta$:'$seqs_rep_set_aligned_pfiltered,'^\S+_rep_set.tre$:'$seqs_rep_set_tre + pick_otus_through_otu_table.py + --input_fp=$input_fp + #if $parameter.source == 'hist': + --parameter_fp=$parameter.parameter_fp + #else: + --parameter_fp=$parameter_generated + #end if + #if $denoise.choice == 'yes': + --sff_fp=$denoise.sff_fp + --mapping_fp=$denoise.mapping_fp + #end if + --output_dir='$log.extra_files_path' + --force + $parallel + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +##OTU picker parameters +pick_otus:otu_picking_method uclust +pick_otus:clustering_algorithm furthest +pick_otus:max_cdhit_memory 400 +pick_otus:refseqs_fp +pick_otus:blast_db +pick_otus:similarity 0.97 +pick_otus:max_e_value 1e-10 +pick_otus:prefix_prefilter_length +pick_otus:trie_prefilter +pick_otus:prefix_length +pick_otus:suffix_length +pick_otus:optimal_uclust +pick_otus:exact_uclust +pick_otus:user_sort +pick_otus:suppress_presort_by_abundance_uclust +pick_otus:suppress_new_clusters +pick_otus:suppress_uclust_stable_sort +pick_otus:max_accepts +pick_otus:max_rejects +pick_otus:word_length +pick_otus:stepwords +##Representative set picker parameters +pick_rep_set:rep_set_picking_method first +pick_rep_set:sort_by otu +##Multiple sequence alignment parameters +align_seqs:template_fp +align_seqs:alignment_method pynast +align_seqs:pairwise_alignment_method uclust +align_seqs:blast_db +align_seqs:min_length 150 +align_seqs:min_percent_id 75.0 +##Alignment filtering (prior to tree-building) parameters +filter_alignment:lane_mask_fp +filter_alignment:allowed_gap_frac 0.999999 +filter_alignment:remove_outliers False +filter_alignment:threshold 3.0 +##Taxonomy assignment parameters +assign_taxonomy:id_to_taxonomy_fp +assign_taxonomy:reference_seqs_fp +assign_taxonomy:assignment_method rdp +assign_taxonomy:blast_db +assign_taxonomy:confidence 0.8 +#assign_taxonomy:e_value 0.001 +##Phylogenetic tree building parameters +make_phylogeny:tree_method fasttree +make_phylogeny:root_method tree_method_default +##align_seqs:template_fp +##filter_alignment:lane_mask_fp + + + + + + + + + + + + + + + + + + + + The steps performed by this function are: + 0) Optionally denoise the sequences (if sff_input_fp=True); + 1) Pick OTUs; + 2) Pick a representative set; + 3) Align the representative set; + 4) Assign taxonomy; + 5) Filter the alignment prior to tree building - remove positions + which are all gaps, and specified as 0 in the lanemask + 6) Build a phylogenetic tree; + 7) Build an OTU table. + + +pick_otus_through_otu_table.py -i split_library_output/seqs.fna -p custom_parameters.txt -o wf_da --print_only + +python /usr/local/bin/pick_otus.py -i split_library_output/seqs.fna -o wf_da/uclust_picked_otus --max_e_value 1e-10 --clustering_algorithm furthest --similarity 0.97 --otu_picking_method uclust --max_cdhit_memory 400 + +python /usr/local/bin/pick_rep_set.py -i wf_da/uclust_picked_otus/seqs_otus.txt -f split_library_output/seqs.fna -l wf_da/uclust_picked_otus/rep_set//seqs_rep_set.log -o wf_da/uclust_picked_otus/rep_set//seqs_rep_set.fasta --rep_set_picking_method first --sort_by otu + +python /usr/local/bin/assign_taxonomy.py -o wf_da/uclust_picked_otus/rep_set//rdp_assigned_taxonomy -i wf_da/uclust_picked_otus/rep_set//seqs_rep_set.fasta --confidence 0.8 --assignment_method rdp + +python /usr/local/bin/make_otu_table.py -i wf_da/uclust_picked_otus/seqs_otus.txt -t wf_da/uclust_picked_otus/rep_set//rdp_assigned_taxonomy/seqs_rep_set_tax_assignments.txt -o wf_da/uclust_picked_otus/rep_set//rdp_assigned_taxonomy/otu_table//seqs_otu_table.txt + +python /usr/local/bin/align_seqs.py -i wf_da/uclust_picked_otus/rep_set//seqs_rep_set.fasta -o wf_da/uclust_picked_otus/rep_set//pynast_aligned_seqs --alignment_method pynast --pairwise_alignment_method uclust --min_percent_id 75.0 --min_length 150 + +python /usr/local/bin/filter_alignment.py -o wf_da/uclust_picked_otus/rep_set//pynast_aligned_seqs -i wf_da/uclust_picked_otus/rep_set//pynast_aligned_seqs/seqs_rep_set_aligned.fasta --allowed_gap_frac 0.999999 --threshold 3.0 + +python /usr/local/bin/make_phylogeny.py -i wf_da/uclust_picked_otus/rep_set//pynast_aligned_seqs/seqs_rep_set_aligned_pfiltered.fasta -o wf_da/uclust_picked_otus/rep_set//pynast_aligned_seqs/fasttree_phylogeny/seqs_rep_set.tre -l wf_da/uclust_picked_otus/rep_set//pynast_aligned_seqs/fasttree_phylogeny/seqs_rep_set_phylogeny.log --root_method tree_method_default --tree_method fasttree + + + + + diff -r 2c9714f56480 -r d80000f5ad20 pick_reference_otus_through_otu_table.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pick_reference_otus_through_otu_table.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,94 @@ + + Reference OTU picking/Shotgun UniFrac workflow. + + pick_reference_otus_through_otu_table.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + pick_reference_otus_through_otu_table.py + --input_fp=$input_fp + --reference_fp=$reference_fp + --output_dir=$__new_file_path__ + #if $parameter.source == 'hist': + --parameter_fp=$parameter_fp + #else: + --parameter_fp=$parameter_generated + #end if + --taxonomy_fp=$taxonomy_fp + $force + $print_only + $parallel + + + + + + + + + + + + + + + + + + + + + + + + + +alpha_diversity:metrics chao1,observed_species,PD_whole_tree +multiple_rarefactions_even_depth:num-reps 20 +parallel:jobs_to_start 2 +parallel:retain_temp_files False +parallel:seconds_to_sleep 60 +collate_alpha:example_path +make_rarefaction_plots:imagetype png +make_rarefaction_plots:resolution 75 +make_rarefaction_plots:background_color white +make_rarefaction_plots:prefs_path + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 pick_rep_set.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pick_rep_set.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,53 @@ + + Pick representative set of sequences + + pick_rep_set.py + + + qiime_wrapper.py + pick_rep_set.py + --input_file=$input_file + #if $reference_seqs_fp.__str__ != 'None' or $reference_seqs_fp != None and $reference_seqs_fp.__str__ == '': + --reference_seqs_fp=$reference_seqs_fp + #else: + --fasta_file=$fasta_file + #end if + --rep_set_picking_method=$rep_set_picking_method + --sort_by=$sort_by + --log_fp=$log_fp + --result_fp=$result_fp + + + + + + + + + + + + + + + + + + + + + + + For more information, see pick_rep_set_ in the Qiime documentation. + +Updated and validated 01/16/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + +.. _pick_rep_set: http://qiime.org/scripts/pick_rep_set.html + + diff -r 2c9714f56480 -r d80000f5ad20 plot_rank_abundance_graph.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/plot_rank_abundance_graph.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,44 @@ + + plot rank-abundance curve + + plot_rank_abundance_graph.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + plot_rank_abundance_graph.py + --otu_table_fp=$otu_table_fp + --sample_name=$sample_name + --output_dir=$__new_file_path__ + $absolute_counts + $no_legend + $x_linear_scale + $y_linear_scale + --file_type=$file_type + + + + + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 plot_taxa_summary.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/plot_taxa_summary.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,99 @@ + + Make taxaonomy summary charts based on taxonomy assignment + + plot_taxa_summary.py + + + qiime_wrapper.py + --galaxy_outputdir='$taxonomy_summary_chart.extra_files_path' + --galaxy_datasets='^\S+_charts\.html$:'$taxonomy_summary_chart + plot_taxa_summary.py + #set $counts = [] + #for i in $inputs: + #set $counts = $counts + [$i.counts_fname.__str__] + #end for + --counts_fname=#echo ','.join($counts) + #if $labels != None and $labels.__str__ != 'None' and $labels.__str__ != '': + --labels=$labels + #end if + --num_categories=$num_categories + #if $colorby != None and $colorby.__str__ != 'None' and $colorby.__str__ != '': + --colorby=$colorby + #end if + #if $prefs_path != None and $prefs_path.__str__ != 'None': + --prefs_path=$prefs_path + #end if + --background_color=$background_color + --dpi=$dpi + --x_width=$x_width + --y_height=$y_height + --bar_width=$bar_width + --type_of_file=$type_of_file + --chart_type=$chart_type + --resize_nth_label=$resize_nth_label + $include_html_legend + $include_html_counts + --label_type=$label_type + --dir_path='$taxonomy_summary_chart.extra_files_path' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + For more information, see plot_taxa_summary_ in the Qiime documentation. + +Updated and validated 01/20/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + + .. _plot_taxa_summary: http://qiime.org/scripts/plot_taxa_summary.html + + diff -r 2c9714f56480 -r d80000f5ad20 poller.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/poller.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,43 @@ + + Poller for parallel QIIME scripts. + + poller.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + poller.py + --check_run_complete_file=$check_run_complete_file + --check_run_complete_f=$check_run_complete_f + --process_run_results_f=$process_run_results_f + --process_run_results_file=$process_run_results_file + --clean_up_f=$clean_up_f + --clean_up_file=$clean_up_file + --time_to_sleep=$time_to_sleep + + + + + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 poller_example.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/poller_example.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,34 @@ + + Create python file + + poller_example.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + poller_example.py + --polled_dir=$polled_dir + --poller_fp=$poller_fp + --python_exe_fp=$python_exe_fp + $suppress_custom_functions + + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 pool_by_metadata.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pool_by_metadata.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,37 @@ + + pool samples in OTU table and mapping file based on sample metadata from mapping file + + pool_by_metadata.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + pool_by_metadata.py + --otu_table_fp=$otu_table_fp + --map=$map + --states=$states + --otu_outfile=$otu_outfile + --map_outfile=$map_outfile + --pooled_sample_name=$pooled_sample_name + + + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 principal_coordinates.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/principal_coordinates.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,40 @@ + + Principal Coordinates Analysis (PCoA) + + principal_coordinates.py + + + qiime_wrapper.py + principal_coordinates.py + --input_path=$input_path + --output_path=$output_path + + + + + + + + + + + + + + + + + + + + + For more information, see principle_coordinates_ in the Qiime documentation. + +Updated and validated 01/18/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + + .. _principle_coordinates: http://qiime.org/scripts/principal_coordinates.html + + diff -r 2c9714f56480 -r d80000f5ad20 print_qiime_config.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/print_qiime_config.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,23 @@ + + Print out the qiime config settings. + + print_qiime_config.py + + + print_qiime_config.py + $test + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 process_sff.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/process_sff.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,35 @@ + + Convert sff to FASTA and QUAL files + + process_sff.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + process_sff.py + --input_dir=$input_dir + $make_flowgram + $convert_to_FLX + $use_sfftools + --output_dir=$__new_file_path__ + + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 qiime/README --- a/qiime/README Wed Jun 06 14:50:36 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,129 +0,0 @@ -This was a first attempt at providing galaxy tool_wrappers for the Qiime metagenomics package: -You must first istall Qiime: http://qiime.sourceforge.net/install/install.html - - - -Initial tool wrappers were generated by a script searching the qiime scripts (version 1.2.1) for usage info, -and then were hand edited afterwards. - -NOTE: A few of the tool configs worked on the galaxy-central code in April 2011. -I haven't taken time to check them with more recent galaxy releases. - - -I executed the qiime scripts via qiime_wrapper.py -This was to accommmodate moving multiple outputs to history items: http://wiki.g2.bx.psu.edu/Admin/Tools/Multiple%20Output%20Files - - -The datatypes file: metagenomics.py has Mothur datatypes with a start at qiime types added at the end. - - - - -The most common used qiime scripts are: -- check_id_map.py -- split_libraries.py -- pick_otus_through_otu_table.py -- beta_diversity_through_3d_plots.py -- alpha_rarefaction.py -- jackknifed_beta_diversity.py -- filter_by_metadata.py -- filter_otu_table.py -- merge_otu_tables.py -- merge_mapping_files.py - - -Tool_config development status: -The tool configs with a * indicate that the tool at least displayed in galaxy at least once upon time. -( Since these were intially auto generated, some may not make sense in a galaxy framework. ) - - add_taxa.xml - adjust_seq_orientation.xml -* align_seqs.xml -* alpha_diversity.xml metrics - select input/output repeat conditional tree -* alpha_rarefaction.xml -* assign_taxonomy.xmlA assignment_method-select -* beta_diversity.xml -* beta_diversity_through_3d_plots.xml html-plots - beta_significance.xml - blast_wrapper.xml -* check_id_map.xml - collate_alpha.xml -* compare_3d_plots.xml - consensus_tree.xml - convert_otu_table_to_unifrac_sample_mapping.xml - convert_unifrac_sample_mapping_to_otu_table.xml -* denoise.xml -* dissimilarity_mtx_stats.xml - exclude_seqs_by_blast.xml - extract_seqs_by_sample_id.xml -* filter_alignment.xml - filter_by_metadata.xml - filter_fasta.xml - filter_otu_table.xml -* filter_otus_by_sample.xml - fix_arb_fasta.xml - identify_chimeric_seqs.xml -* jackknifed_beta_diversity.xml -* make_2d_plots.xml -* make_3d_plots.xml - make_bootstrapped_tree.xml - make_distance_histograms.xml - make_fastq.xml - make_library_id_lists.xml -* make_otu_heatmap_html.xml -* make_otu_network.xml - make_otu_table.xml - make_per_library_sff.xml - make_phylogeny.xml - make_pie_charts.xml - make_prefs_file.xml - make_qiime_py_file.xml -* make_qiime_rst_file.xml -* make_rarefaction_plots.xml -* make_sra_submission.xml -* merge_denoiser_output.xml - merge_mapping_files.xml - merge_otu_maps.xml - merge_otu_tables.xml - multiple_rarefactions.xml - multiple_rarefactions_even_depth.xml - otu_category_significance.xml -* parallel_align_seqs_pynast.xml - parallel_alpha_diversity.xml -* parallel_assign_taxonomy_blast.xml -* parallel_assign_taxonomy_rdp.xml - parallel_beta_diversity.xml -* parallel_blast.xml - parallel_identify_chimeric_seqs.xml - parallel_multiple_rarefactions.xml -* parallel_pick_otus_blast.xml -* parallel_pick_otus_uclust_ref.xml - per_library_stats.xml -* pick_otus.xml -* pick_otus_through_otu_table.xml - pick_rep_set.xml -* plot_rank_abundance_graph.xml - poller.xml - poller_example.xml - pool_by_metadata.xml - principal_coordinates.xml - print_qiime_config.xml -* process_sff.xml -* process_sra_submission.xml -* quality_scores_plot.xml - shared_phylotypes.xml - single_rarefaction.xml - sort_denoiser_output.xml -* split_libraries.xml -* split_libraries_illumina.xml - sra_spreadsheet_to_map_files.xml - start_parallel_jobs.xml - summarize_otu_by_cat.xml - summarize_taxa.xml -* supervised_learning.xml -* transform_coordinate_matrices.xml -* tree_compare.xml - trflp_file_to_otu_table.xml - trim_sff_primers.xml -* truncate_fasta_qual_files.xml - upgma_cluster.xml diff -r 2c9714f56480 -r d80000f5ad20 qiime/align_seqs.xml --- a/qiime/align_seqs.xml Wed Jun 06 14:50:36 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,91 +0,0 @@ - - Align sequences using a variety of alignment methods - - align_seqs.py - - - qiime_wrapper.py - --galaxy_outputdir='$log.extra_files_path' - --galaxy_datasets='^\S+_aligned\.\S+$:'$aligned_fasta,'^\S+_log\.txt$:'$log,'^\S+_failures\.fasta$:'$failures - align_seqs.py - --input_fasta_fp=$input_fasta_fp - --alignment_method=$alignment_method - #if $alignment_method.__str__ == 'pynast': - #if $alignment.template_fp != None and $alignment.template_fp.__str__ != 'None' and $alignment.template_fp.__str__ != '': - --template_fp=$alignment.template_fp - #end if - --pairwise_alignment_method=$pairwise_alignment_method - --min_length=$min_length - --min_percent_id=$min_percent_id - #if $blast_db != None and $blast_db.__str__ != 'None' and $blast_db.__str__ != '': - --blast_db=$blast_db - #end if - #elif $alignment_method.__str__ == 'infernal': - --template_fp=$alignment.template_fp - #end if - - --output_dir='$log.extra_files_path' - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - .. class:: warningmark -Note: MUSCLE alignment is still not verified. Use at your own risk. - -For more information, see align_seqs_ in the Qiime documentation. - -Updated and validated 01/16/12 - -.. _align_seqs: http://qiime.org/scripts/align_seqs.html - - diff -r 2c9714f56480 -r d80000f5ad20 qiime/alpha_diversity.xml --- a/qiime/alpha_diversity.xml Wed Jun 06 14:50:36 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,79 +0,0 @@ - - Calculate alpha diversity on each sample in an otu table, using a variety of alpha diversity metrics - - alpha_diversity.py - - - qiime_wrapper.py - #if $run_type.input_type.__str__ == "multi": - --galaxy_logfile=$output_path - --galaxy_outputdir=$output_path.extra_files_path - #end if - alpha_diversity.py - #if $run_type.input_type.__str__ == "multi": - --input_path=$input_path.extra_files_path - --output_path=$output_path.extra_files_path - #else: - --output_path=$output_path - --input_path=$input_path - #end if - --metrics=$metrics - #if $metrics.__str__ == 'PD_whole_tree': - --tree_path=$tree_path - #end if - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - This tool calculates alpha diversity, or within-sample diversity, using an otu table. Metrics may be selected in any combination. Input can be the log file from multiple_rarefactions (batch alpha diversity), or a single rarefied OTU table (single_rarefaction/single file alpha diversity). When the phylogenetic metric PD_whole_tree is selected, a .tre file must be supplied for the tool to run. The output file is a log file listing all the alpha rarefaction files produced. - - - diff -r 2c9714f56480 -r d80000f5ad20 qiime/alpha_rarefaction.xml --- a/qiime/alpha_rarefaction.xml Wed Jun 06 14:50:36 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,108 +0,0 @@ - - A workflow script for performing alpha rarefaction - - alpha_rarefaction.py - - - qiime_wrapper.py - --galaxy_summary_html='$output_html' - --galaxy_outputdir='$output_html.extra_files_path' - --galaxy_summary_template='$output_template' - ## --galaxy_datasets='^rarefaction_plots.html$:'$output_html - alpha_rarefaction.py - --otu_table_fp=$otu_table_fp - --mapping_fp=$mapping_fp - --output_dir=$output_html.extra_files_path - #if $parameter.source.__str__ == 'hist': - --parameter_fp=$parameter_fp - #else: - --parameter_fp=$parameter_generated - #end if - --num_steps=$num_steps - $force - $print_only - $parallel - #if $tree_fp != None and $tree_fp.__str__ != 'None': - --tree_fp=$tree_fp - #end if - - - - - - - - - - - - - - - - - - - - - - - - -alpha_diversity:metrics chao1,observed_species,PD_whole_tree -multiple_rarefactions_even_depth:num-reps 20 -parallel:jobs_to_start 2 -parallel:retain_temp_files False -parallel:seconds_to_sleep 60 -collate_alpha:example_path -make_rarefaction_plots:imagetype png -make_rarefaction_plots:resolution 75 -make_rarefaction_plots:background_color white -make_rarefaction_plots:prefs_path - - - - -rarefaction_plots.html - - -]]> - - - - - - - - - - - - diff -r 2c9714f56480 -r d80000f5ad20 qiime/assign_taxonomy.xml --- a/qiime/assign_taxonomy.xml Wed Jun 06 14:50:36 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,51 +0,0 @@ - - Assign taxonomy to each sequence - - assign_taxonomy.py - - - qiime_wrapper.py - --galaxy_outputdir='$outputfile.extra_files_path' - --galaxy_datasets='^\S+\.txt$:'$outputfile - assign_taxonomy.py - --input_fasta_fp=$input_fasta_fp - #if $id_to_taxonomy_fp != None and $id_to_taxonomy_fp.__str__ != 'None' and $id_to_taxonomy_fp.__str__ != '': - --id_to_taxonomy_fp=$id_to_taxonomy_fp - #end if - #if $reference_seqs_fp != None and $reference_seqs_fp.__str__ != 'None' and $reference_seqs_fp.__str__ != '': - --reference_seqs_fp=$reference_seqs_fp - #end if - #if $training_data_properties_fp != None and $training_data_properties_fp.__str__ != 'None' and $training_data_properties_fp.__str__ != '': - --training_data_properties_fp.$training_data_properties_fp - #end if - --confidence=$confidence - --assignment_method=rdp - --output_dir='$outputfile.extra_files_path' - - - - - - - - - - - - - - - Only uses RDP. For blast, use MBAC blast tools. - -For more information, see assign_taxonomy_ in the Qiime documentation. - -Updated and validated 01/16/12 - -.. _assign_taxonomy: http://qiime.org/scripts/assign_taxonomy.html - - diff -r 2c9714f56480 -r d80000f5ad20 qiime/beta_diversity.xml --- a/qiime/beta_diversity.xml Wed Jun 06 14:50:36 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,349 +0,0 @@ - - Calculate beta diversity (pairwise sample dissimilarity) on one or many otu tables - - beta_diversity.py - - - qiime_wrapper.py - --galaxy_outputdir=$__new_file_path__ - #set datasets = [] - #set $path = "" - #if $binary_chisq.__str__ != "None": - #set datasets = $datasets + ["'binary_chisq_.*$:'" + $binary_chisq.__str__] - #if path == "": - #set $path=$binary_chisq.extra_files_path - #end if - #end if - #if $binary_chord.__str__ != "None": - #set datasets = $datasets + ["'binary_chord_.*$:'" + $binary_chord.__str__] - #if path == "": - #set $path=$binary_chord.extra_files_path - #end if - #end if - #if $binary_euclidean.__str__ != "None": - #set datasets = $datasets + ["'binary_euclidean_.*$:'" + $binary_euclidean.__str__] - #if path == "": - #set $path=$binary_euclidean.extra_files_path - #end if - #end if - #if $binary_hamming.__str__ != "None": - #set datasets = $datasets + ["'binary_hamming_.*$:'" + $binary_hamming.__str__] - #if path == "": - #set $path=$binary_hamming.extra_files_path - #end if - #end if - #if $binary_jaccard.__str__ != "None": - #set datasets = $datasets + ["'binary_jaccard_.*$:'" + $binary_jaccard.__str__] - #if path == "": - #set $path=$binary_jaccard.extra_files_path - #end if - #end if - #if $binary_lennon.__str__ != "None": - #set datasets = $datasets + ["'binary_lennon_.*$:'" + $binary_lennon.__str__] - #if path == "": - #set $path=$binary_lennon.extra_files_path - #end if - #end if - #if $binary_ochiai.__str__ != "None": - #set datasets = $datasets + ["'binary_ochiai_.*$:'" + $binary_ochiai.__str__] - #if path == "": - #set $path=$binary_ochiai.extra_files_path - #end if - #end if - #if $binary_pearson.__str__ != "None": - #set datasets = $datasets + ["'binary_pearson_.*$:'" + $binary_pearson.__str__] - #if path == "": - #set $path=$binary_pearson.extra_files_path - #end if - #end if - #if $binary_sorensen_dice.__str__ != "None": - #set datasets = $datasets + ["'binary_sorensen_dice_.*$:'" + $binary_sorensen_dice.__str__] - #if path == "": - #set $path=$binary_sorensen.extra_files_path - #end if - #end if - #if $bray_curtis.__str__ != "None": - #set datasets = $datasets + ["'bray_curtis_.*$:'" + $bray_curtis.__str__] - #if path == "": - #set $path=$bray_curtis.extra_files_path - #end if - #end if - #if $canberra.__str__ != "None": - #set datasets = $datasets + ["'canberra_.*$:'" + $canberra.__str__] - #if path == "": - #set $path=$canberra.extra_files_path - #end if - #end if - #if $chisq.__str__ != "None": - #set datasets = $datasets + ["'chisq_.*$:'" + $chisq.__str__] - #if path == "": - #set $path=$binary_euclidean.extra_files_path - #end if - #end if - #if $chord.__str__ != "None": - #set datasets = $datasets + ["'chord_.*$:'" + $chord.__str__] - #if path == "": - #set $path=$chord.extra_files_path - #end if - #end if - #if $euclidean.__str__ != "None": - #set datasets = $datasets + ["'euclidean_.*$:'" + $euclidean.__str__] - #if path == "": - #set $path=$euclidean.extra_files_path - #end if - #end if - #if $gower.__str__ != "None": - #set datasets = $datasets + ["'gower_.*$:'" + $gower.__str__] - #if path == "": - #set $path=$gower.extra_files_path - #end if - #end if - #if $hellinger.__str__ != "None": - #set datasets = $datasets + ["'hellinger_.*$:'" + $hellinger.__str__] - #if path == "": - #set $path=$hellinger.extra_files_path - #end if - #end if - #if $kulczynski.__str__ != "None": - #set datasets = $datasets + ["'kulczynski_.*$:'" + $kulczynski.__str__] - #if path == "": - #set $path=$kulczynski.extra_files_path - #end if - #end if - #if $manhattan.__str__ != "None": - #set datasets = $datasets + ["'manhattan_.*$:'" + $manhattan.__str__] - #if path == "": - #set $path=$manhattan.extra_files_path - #end if - #end if - #if $morisita_horn.__str__ != "None": - #set datasets = $datasets + ["'morisita_horn_.*$:'" + $morisita_horn.__str__] - #if path == "": - #set $path=$morisita_horn.extra_files_path - #end if - #end if - #if $pearson.__str__ != "None": - #set datasets = $datasets + ["'pearson_.*$:'" + $pearson.__str__] - #if path == "": - #set $path=$pearson.extra_files_path - #end if - #end if - #if $soergel.__str__ != "None": - #set datasets = $datasets + ["'soergel_.*$:'" + $soergel.__str__] - #if path == "": - #set $path=$soergel.extra_files_path - #end if - #end if - #if $spearman_approx.__str__ != "None": - #set datasets = $datasets + ["'spearman_approx_.*$:'" + $spearman_approx.__str__] - #if path == "": - #set $path=$spearman_approx.extra_files_path - #end if - #end if - #if $specprof.__str__ != "None": - #set datasets = $datasets + ["'specprof_.*$:'" + $specprof.__str__] - #if path == "": - #set $path=$specprof.extra_files_path - #end if - #end if - #if $unifrac.__str__ != "None": - #set datasets = $datasets + ["'unifrac_.*$:'" + $unifrac.__str__] - #if path == "": - #set $path=$unifrac.extra_files_path - #end if - #end if - #if $unifrac_g.__str__ != "None": - #set datasets = $datasets + ["'unifrac_g_.*$:'" + $unifrac_g.__str__] - #if path == "": - #set $path=$unifrac_g.extra_files_path - #end if - #end if - #if $unifrac_g_full_tree.__str__ != "None": - #set datasets = $datasets + ["'unifrac_g_full_tree_.*$:'" + $unifrac_g_full_tree.__str__] - #if path == "": - #set $path=$unifrac_g_full_tree.extra_files_path - #end if - #end if - #if $unweighted_unifrac.__str__ != "None": - #set datasets = $datasets + ["'unweighted_unifrac_.*$:'" + $unweighted_unifrac.__str__] - #if path == "": - #set $path=$unweighted_unifrac.extra_files_path - #end if - #end if - #if $unweighted_unifrac_full_tree.__str__ != "None": - #set datasets = $datasets + ["'unweighted_unifrac_full_tree_.*$:'" + $unweighted_unifrac_full_tree.__str__] - #if path == "": - #set $path=$unweighted_unifrac_full_tree.extra_files_path - #end if - #end if - #if $weighted_normalized_unifrac.__str__ != "None": - #set datasets = $datasets + ["'weighted_normalized_unifrac_.*$:'" + $weighted_normalized_unifrac.__str__] - #if path == "": - #set $path=$weighted_normalized_unifrac.extra_files_path - #end if - #end if - #if $weighted_unifrac.__str__ != "None": - #set datasets = $datasets + ["'weighted_unifrac_.*$:'" + $weighted_unifrac.__str__] - #if path == "": - #set $path=$weighted_unifrac.extra_files_path - #end if - #end if - --galaxy_datasets=#echo ','.join($datasets) - --galaxy_new_files_path='$path' - beta_diversity.py - --input_path=$input_path - #if $rows.__str__ != '': - --rows=$rows - #end if - --output_dir=$__new_file_path__ - --metrics=$metrics - #if $tree_path.__str__ != "None" and len($tree_path.__str__) > 0: - --tree_path=$tree_path - #end if - $full_tree - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 'binary_chisq' in (metrics if isinstance(metrics,list) else [metrics]) - - - 'binary_chord' in (metrics if isinstance(metrics,list) else [metrics]) - - - 'binary_euclidean' in (metrics if isinstance(metrics,list) else [metrics]) - - - 'binary_hamming' in (metrics if isinstance(metrics,list) else [metrics]) - - - 'binary_jaccard' in (metrics if isinstance(metrics,list) else [metrics]) - - - 'binary_lennon' in (metrics if isinstance(metrics,list) else [metrics]) - - - 'binary_ochiai' in (metrics if isinstance(metrics,list) else [metrics]) - - - 'binary_pearson' in (metrics if isinstance(metrics,list) else [metrics]) - - - 'binary_sorensen_dice' in (metrics if isinstance(metrics,list) else [metrics]) - - - 'bray_curtis' in (metrics if isinstance(metrics,list) else [metrics]) - - - 'canberra' in (metrics if isinstance(metrics,list) else [metrics]) - - - 'chisq' in (metrics if isinstance(metrics,list) else [metrics]) - - - 'chord' in (metrics if isinstance(metrics,list) else [metrics]) - - - 'euclidean' in (metrics if isinstance(metrics,list) else [metrics]) - - - 'gower' in (metrics if isinstance(metrics,list) else [metrics]) - - - 'hellinger' in (metrics if isinstance(metrics,list) else [metrics]) - - - 'kulczynski' in (metrics if isinstance(metrics,list) else [metrics]) - - - 'manhattan' in (metrics if isinstance(metrics,list) else [metrics]) - - - 'morisita_horn' in (metrics if isinstance(metrics,list) else [metrics]) - - - 'pearson' in (metrics if isinstance(metrics,list) else [metrics]) - - - 'soergel' in (metrics if isinstance(metrics,list) else [metrics]) - - - 'spearman_approx' in (metrics if isinstance(metrics,list) else [metrics]) - - - 'specprof' in (metrics if isinstance(metrics,list) else [metrics]) - - - 'unifrac' in (metrics if isinstance(metrics,list) else [metrics]) - - - 'unifrac_g' in (metrics if isinstance(metrics,list) else [metrics]) - - - 'unifrac_g_full_tree' in (metrics if isinstance(metrics,list) else [metrics]) - - - 'unweighted_unifrac' in (metrics if isinstance(metrics,list) else [metrics]) - - - 'unweighted_unifrac_full_tree' in (metrics if isinstance(metrics,list) else [metrics]) - - - 'weighted_normalized_unifrac' in (metrics if isinstance(metrics,list) else [metrics]) - - - 'weighted_unifrac' in (metrics if isinstance(metrics,list) else [metrics]) - - - - - For more information, see beta_diversity_ in the Qiime documentation. - -Updated and validated 01/18/12 - - .. _beta_diversity: http://qiime.org/scripts/beta_diversity.html - - diff -r 2c9714f56480 -r d80000f5ad20 qiime/check_id_map.xml --- a/qiime/check_id_map.xml Wed Jun 06 14:50:36 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,50 +0,0 @@ - - Checks user's metadata mapping file for required data, valid format - - check_id_map.py - - - qiime_wrapper.py - --galaxy_outputdir='$log.extra_files_path' - --galaxy_datasets='^\S+_corrected\.txt$:'$corrected_mapping,'^\S+\.log:'$log - check_id_map.py - --map=$map - --output_dir='$log.extra_files_path' - --char_replace=$char_replace - $not_barcoded - $variable_len_barcodes - $disable_primer_check - $verbose - #if $added_demultiplex_field != None and $added_demultiplex_field.__str__ != "": - --added_demultiplex_field=$added_demultiplex_field - #end if - - - - - - - - - - - - - - - - - For more information, see check_id_map_ in the Qiime documentation. - -Updated and validated 01/19/12 - - .. _check_id_map: http://qiime.org/scripts/check_id_map.html - - diff -r 2c9714f56480 -r d80000f5ad20 qiime/collate_alpha.xml --- a/qiime/collate_alpha.xml Wed Jun 06 14:50:36 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,31 +0,0 @@ - - Collate alpha diversity results - - collate_alpha.py - - - qiime_wrapper.py - --galaxy_outputdir=$output1.extra_files_path - --galaxy_new_files_path='$__new_file_path__' - --galaxy_logfile=$output1 - --galaxy_new_datasets='^\S+\.txt$:txt' - --galaxy_datasetid=$output1.id - collate_alpha.py - --input_path=$input_path.extra_files_path - --output_path='$__new_file_path__' - - - - - - - - - - - This tool concatenates all the files generated by alpha_diversity in order to generate rarefaction curves. The input is therefore the log file generated by alpha_diversity, and the output is a log file listing all the output files, as well as the files themselves. Galaxy must be manually refreshed after running this tool to view all output files. - - - diff -r 2c9714f56480 -r d80000f5ad20 qiime/denoise.xml --- a/qiime/denoise.xml Wed Jun 06 14:50:36 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,52 +0,0 @@ - - Denoise a flowgram file - - denoise.py - - - qiime_wrapper.py - denoise.py - --input_file=$input_file - --fasta_file=$fasta_file - --output_dir=$__new_file_path__ - --method=$method - $keep_intermediates - --cut-off=$cut_off - --precision=$precision - --num_cpus=$num_cpus - $force_overwrite - --map_fname=$map_fname - --primer=$primer - - - - - - - - - - - - - - - - - - - - - - - diff -r 2c9714f56480 -r d80000f5ad20 qiime/filter_alignment.xml --- a/qiime/filter_alignment.xml Wed Jun 06 14:50:36 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,49 +0,0 @@ - - Filter sequence alignment by removing highly variable regions - - filter_alignment.py - - - qiime_wrapper.py - ## --galaxy_tmpdir='$__new_file_path__' - --galaxy_outputdir='$pfiltered_fasta.extra_files_path' - --galaxy_datasets='^\S+_pfiltered\.\S+$:'$pfiltered_fasta - filter_alignment.py - --input_fasta_file=$input_fasta_file - --output_dir='$pfiltered_fasta.extra_files_path' - --lane_mask_fp=$lane_mask_fp - $suppress_lane_mask_filter - --allowed_gap_frac=$allowed_gap_frac - $remove_outliers - --threshold=$threshold - #if $entropy_threshold != 0.0: - --entropy_threshold=$entropy_threshold - #end if - - - - - - - - - - - - - - - - For more information, see filter_alignment_ in the Qiime documentation. - -Updated and validated 01/16/12 - -.. _filter_alignment: http://qiime.org/scripts/filter_alignment.html - - diff -r 2c9714f56480 -r d80000f5ad20 qiime/identify_chimeric_seqs.xml --- a/qiime/identify_chimeric_seqs.xml Wed Jun 06 14:50:36 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,72 +0,0 @@ - - Identify chimeric sequences in input FASTA file - - identify_chimeric_seqs.py - - - qiime_wrapper.py - identify_chimeric_seqs.py - --input_fasta_fp=$input_fasta_fp - #if $pick.chimera_detection_method == 'ChimeraSlayer': - --chimera_detection_method=$pick.chimera_detection_method - --aligned_reference_seqs_fp=$pick.aligned_reference_seqs_fp - #if $pick.min_div_ratio.__str__ != '0.0': - --min_div_ratio=$pick.min_div_ratio - #end if - #elif $pick.chimera_detection_method == 'blast_fragments': - --chimera_detection_method=$pick.chimera_detection_method - --id_to_taxonomy_fp=$pick.id_to_taxonomy_fp - #if $pick.blast_db != None and $pick.blast_db.__str__ != "": - --blast_db=$pick.blast_db - #else: - --reference_seqs_fp=$pick.reference_seqs_fp - #end if - --num_fragments=$pick.num_fragments - --taxonomy_depth=$pick.taxonomy_depth - --max_e_value=$pick.max_e_value - #end if - --output_fp=$output_fp - - - - - - - - - - - - - - - - - - - - - - - - - - - - For more information, see identify_chimeric_seqs_ in the Qiime documentation. - -Updated and validated 1/19/12 - -.. _identify_chimeric_seqs: http://qiime.org/scripts/identify_chimeric_seqs.html - - diff -r 2c9714f56480 -r d80000f5ad20 qiime/lib/galaxy/datatypes/metagenomics.py --- a/qiime/lib/galaxy/datatypes/metagenomics.py Wed Jun 06 14:50:36 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1121 +0,0 @@ -""" -metagenomics datatypes -James E Johnson - University of Minnesota -for Mothur -""" - -import data -import logging, os, sys, time, tempfile, shutil, string, glob, re -import galaxy.model -from galaxy.datatypes import metadata -from galaxy.datatypes import tabular -from galaxy.datatypes import sequence -from galaxy.datatypes.metadata import MetadataElement -from galaxy.datatypes.tabular import Tabular -from galaxy.datatypes.sequence import Fasta -from galaxy import util -from galaxy.datatypes.images import Html -from sniff import * - -log = logging.getLogger(__name__) - - -## Mothur Classes - -class Otu( Tabular ): - file_ext = 'otu' - - def sniff( self, filename ): - """ - Determines whether the file is a otu (operational taxonomic unit) format - """ - try: - fh = open( filename ) - count = 0 - while True: - line = fh.readline() - line = line.strip() - if not line: - break #EOF - if line: - if line[0] != '@': - linePieces = line.split('\t') - if len(linePieces) < 2: - return False - try: - check = int(linePieces[1]) - if check + 2 != len(linePieces): - return False - except ValueError: - return False - count += 1 - if count == 5: - return True - fh.close() - if count < 5 and count > 0: - return True - except: - pass - finally: - fh.close() - return False - -class OtuList( Otu ): - file_ext = 'list' - -class Sabund( Otu ): - file_ext = 'sabund' - - def sniff( self, filename ): - """ - Determines whether the file is a otu (operational taxonomic unit) format - labelcount[value(1..n)] - - """ - try: - fh = open( filename ) - count = 0 - while True: - line = fh.readline() - line = line.strip() - if not line: - break #EOF - if line: - if line[0] != '@': - linePieces = line.split('\t') - if len(linePieces) < 2: - return False - try: - check = int(linePieces[1]) - if check + 2 != len(linePieces): - return False - for i in range( 2, len(linePieces)): - ival = int(linePieces[i]) - except ValueError: - return False - count += 1 - if count >= 5: - return True - fh.close() - if count < 5 and count > 0: - return True - except: - pass - finally: - fh.close() - return False - -class Rabund( Sabund ): - file_ext = 'rabund' - -class GroupAbund( Otu ): - file_ext = 'grpabund' - def init_meta( self, dataset, copy_from=None ): - Otu.init_meta( self, dataset, copy_from=copy_from ) - def set_meta( self, dataset, overwrite = True, skip=1, max_data_lines = 100000, **kwd ): - # See if file starts with header line - if dataset.has_data(): - try: - fh = open( dataset.file_name ) - line = fh.readline() - line = line.strip() - linePieces = line.split('\t') - if linePieces[0] == 'label' and linePieces[1] == 'Group': - skip=1 - else: - skip=0 - finally: - fh.close() - Otu.set_meta( self, dataset, overwrite, skip, max_data_lines, **kwd) - def sniff( self, filename, vals_are_int=False): - """ - Determines whether the file is a otu (operational taxonomic unit) Shared format - labelgroupcount[value(1..n)] - The first line is column headings as of Mothur v 1.20 - """ - log.info( "sniff GroupAbund vals_are_int %s" % vals_are_int) - try: - fh = open( filename ) - count = 0 - while True: - line = fh.readline() - line = line.strip() - if not line: - break #EOF - if line: - if line[0] != '@': - linePieces = line.split('\t') - if len(linePieces) < 3: - return False - if count > 0 or linePieces[0] != 'label': - try: - check = int(linePieces[2]) - if check + 3 != len(linePieces): - return False - for i in range( 3, len(linePieces)): - if vals_are_int: - ival = int(linePieces[i]) - else: - fval = float(linePieces[i]) - except ValueError: - return False - count += 1 - if count >= 5: - return True - fh.close() - if count < 5 and count > 0: - return True - except: - pass - finally: - fh.close() - return False - -class SharedRabund( GroupAbund ): - file_ext = 'shared' - - - def sniff( self, filename ): - """ - Determines whether the file is a otu (operational taxonomic unit) Shared format - labelgroupcount[value(1..n)] - The first line is column headings as of Mothur v 1.20 - """ - # return GroupAbund.sniff(self,filename,True) - isme = GroupAbund.sniff(self,filename,True) - log.info( "is SharedRabund %s" % isme) - return isme - - -class RelAbund( GroupAbund ): - file_ext = 'relabund' - - def sniff( self, filename ): - """ - Determines whether the file is a otu (operational taxonomic unit) Relative Abundance format - labelgroupcount[value(1..n)] - The first line is column headings as of Mothur v 1.20 - """ - # return GroupAbund.sniff(self,filename,False) - isme = GroupAbund.sniff(self,filename,False) - log.info( "is RelAbund %s" % isme) - return isme - -class SecondaryStructureMap(Tabular): - file_ext = 'map' - def __init__(self, **kwd): - """Initialize secondary structure map datatype""" - Tabular.__init__( self, **kwd ) - self.column_names = ['Map'] - - def sniff( self, filename ): - """ - Determines whether the file is a secondary structure map format - A single column with an integer value which indicates the row that this row maps to. - check you make sure is structMap[10] = 380 then structMap[380] = 10. - """ - try: - fh = open( filename ) - line_num = 0 - rowidxmap = {} - while True: - line = fh.readline() - line_num += 1 - line = line.strip() - if not line: - break #EOF - if line: - try: - pointer = int(line) - if pointer > 0: - if pointer > line_num: - rowidxmap[line_num] = pointer - elif pointer < line_num & rowidxmap[pointer] != line_num: - return False - except ValueError: - return False - fh.close() - if count < 5 and count > 0: - return True - except: - pass - finally: - fh.close() - return False - -class SequenceAlignment( Fasta ): - file_ext = 'align' - def __init__(self, **kwd): - Fasta.__init__( self, **kwd ) - """Initialize AlignCheck datatype""" - - def sniff( self, filename ): - """ - Determines whether the file is in Mothur align fasta format - Each sequence line must be the same length - """ - - try: - fh = open( filename ) - len = -1 - while True: - line = fh.readline() - if not line: - break #EOF - line = line.strip() - if line: #first non-empty line - if line.startswith( '>' ): - #The next line.strip() must not be '', nor startwith '>' - line = fh.readline().strip() - if line == '' or line.startswith( '>' ): - break - if len < 0: - len = len(line) - elif len != len(line): - return False - else: - break #we found a non-empty line, but its not a fasta header - if len > 0: - return True - except: - pass - finally: - fh.close() - return False - -class AlignCheck( Tabular ): - file_ext = 'align.check' - def __init__(self, **kwd): - """Initialize AlignCheck datatype""" - Tabular.__init__( self, **kwd ) - self.column_names = ['name','pound','dash','plus','equal','loop','tilde','total'] - self.column_types = ['str','int','int','int','int','int','int','int'] - self.comment_lines = 1 - - def set_meta( self, dataset, overwrite = True, **kwd ): - # Tabular.set_meta( self, dataset, overwrite = overwrite, first_line_is_header = True, skip = 1 ) - data_lines = 0 - if dataset.has_data(): - dataset_fh = open( dataset.file_name ) - while True: - line = dataset_fh.readline() - if not line: break - data_lines += 1 - dataset_fh.close() - dataset.metadata.comment_lines = 1 - dataset.metadata.data_lines = data_lines - 1 if data_lines > 0 else 0 - dataset.metadata.column_names = self.column_names - dataset.metadata.column_types = self.column_types - -class AlignReport(Tabular): - """ -QueryName QueryLength TemplateName TemplateLength SearchMethod SearchScore AlignmentMethod QueryStart QueryEnd TemplateStart TemplateEnd PairwiseAlignmentLength GapsInQuery GapsInTemplate LongestInsert SimBtwnQuery&Template -AY457915 501 82283 1525 kmer 89.07 needleman 5 501 1 499 499 2 0 0 97.6 - """ - file_ext = 'align.report' - def __init__(self, **kwd): - """Initialize AlignCheck datatype""" - Tabular.__init__( self, **kwd ) - self.column_names = ['QueryName','QueryLength','TemplateName','TemplateLength','SearchMethod','SearchScore', - 'AlignmentMethod','QueryStart','QueryEnd','TemplateStart','TemplateEnd', - 'PairwiseAlignmentLength','GapsInQuery','GapsInTemplate','LongestInsert','SimBtwnQuery&Template' - ] - -class BellerophonChimera( Tabular ): - file_ext = 'bellerophon.chimera' - def __init__(self, **kwd): - """Initialize AlignCheck datatype""" - Tabular.__init__( self, **kwd ) - self.column_names = ['Name','Score','Left','Right'] - -class SecondaryStructureMatch(Tabular): - """ - name pound dash plus equal loop tilde total - 9_1_12 42 68 8 28 275 420 872 - 9_1_14 36 68 6 26 266 422 851 - 9_1_15 44 68 8 28 276 418 873 - 9_1_16 34 72 6 30 267 430 860 - 9_1_18 46 80 2 36 261 - """ - def __init__(self, **kwd): - """Initialize SecondaryStructureMatch datatype""" - Tabular.__init__( self, **kwd ) - self.column_names = ['name','pound','dash','plus','equal','loop','tilde','total'] - -class DistanceMatrix(data.Text): - file_ext = 'dist' - """Add metadata elements""" - MetadataElement( name="sequence_count", default=0, desc="Number of sequences", readonly=False, optional=True, no_value=0 ) - - -class LowerTriangleDistanceMatrix(DistanceMatrix): - file_ext = 'lower.dist' - def __init__(self, **kwd): - """Initialize secondary structure map datatype""" - DistanceMatrix.__init__( self, **kwd ) - - def sniff( self, filename ): - """ - Determines whether the file is a lower-triangle distance matrix (phylip) format - The first line has the number of sequences in the matrix. - The remaining lines have the sequence name followed by a list of distances from all preceeding sequences - 5 - U68589 - U68590 0.3371 - U68591 0.3609 0.3782 - U68592 0.4155 0.3197 0.4148 - U68593 0.2872 0.1690 0.3361 0.2842 - """ - try: - fh = open( filename ) - count = 0 - while True: - line = fh.readline() - line = line.strip() - if not line: - break #EOF - if line: - if line[0] != '@': - linePieces = line.split('\t') - if len(linePieces) != 3: - return False - try: - check = float(linePieces[2]) - except ValueError: - return False - count += 1 - if count == 5: - return True - fh.close() - if count < 5 and count > 0: - return True - except: - pass - finally: - fh.close() - return False - -class SquareDistanceMatrix(DistanceMatrix,Tabular): - file_ext = 'square.dist' - sequence_count = -1 - - def __init__(self, **kwd): - """Initialize secondary structure map datatype""" - Tabular.__init__( self, **kwd ) - def init_meta( self, dataset, copy_from=None ): - data.Text.init_meta( self, dataset, copy_from=copy_from ) - def set_meta( self, dataset, overwrite = True, skip = None, **kwd ): - dataset.metadata.sequences = 0 - - def sniff( self, filename ): - """ - Determines whether the file is a square distance matrix (Column-formatted distance matrix) format - The first line has the number of sequences in the matrix. - The following lines have the sequence name in the first column plus a column for the distance to each sequence - in the row order in which they appear in the matrix. - 3 - U68589 0.0000 0.3371 0.3610 - U68590 0.3371 0.0000 0.3783 - U68590 0.3371 0.0000 0.3783 - """ - try: - fh = open( filename ) - count = 0 - line = fh.readline() - line = line.strip() - sequence_count = int(line) - col_cnt = seq_cnt + 1 - while True: - line = fh.readline() - line = line.strip() - if not line: - break #EOF - if line: - if line[0] != '@': - linePieces = line.split('\t') - if len(linePieces) != col_cnt : - return False - try: - for i in range(1, col_cnt): - check = float(linePieces[i]) - except ValueError: - return False - count += 1 - if count == 5: - return True - fh.close() - if count < 5 and count > 0: - return True - except: - pass - finally: - fh.close() - return False - -class PairwiseDistanceMatrix(DistanceMatrix,Tabular): - file_ext = 'pair.dist' - def __init__(self, **kwd): - """Initialize secondary structure map datatype""" - Tabular.__init__( self, **kwd ) - self.column_names = ['Sequence','Sequence','Distance'] - self.column_types = ['str','str','float'] - self.comment_lines = 1 - - def sniff( self, filename ): - """ - Determines whether the file is a pairwise distance matrix (Column-formatted distance matrix) format - The first and second columns have the sequence names and the third column is the distance between those sequences. - """ - try: - fh = open( filename ) - count = 0 - while True: - line = fh.readline() - line = line.strip() - if not line: - break #EOF - if line: - if line[0] != '@': - linePieces = line.split('\t') - if len(linePieces) != 3: - return False - try: - check = float(linePieces[2]) - except ValueError: - return False - count += 1 - if count == 5: - return True - fh.close() - if count < 5 and count > 0: - return True - except: - pass - finally: - fh.close() - return False - -class AlignCheck(Tabular): - file_ext = 'align.check' - def __init__(self, **kwd): - """Initialize secondary structure map datatype""" - Tabular.__init__( self, **kwd ) - self.column_names = ['name','pound','dash','plus','equal','loop','tilde','total'] - self.columns = 8 - -class Names(Tabular): - file_ext = 'names' - def __init__(self, **kwd): - """Name file shows the relationship between a representative sequence(col 1) and the sequences(comma-separated) it represents(col 2)""" - Tabular.__init__( self, **kwd ) - self.column_names = ['name','representatives'] - self.columns = 2 - -class Summary(Tabular): - file_ext = 'summary' - def __init__(self, **kwd): - """summarizes the quality of sequences in an unaligned or aligned fasta-formatted sequence file""" - Tabular.__init__( self, **kwd ) - self.column_names = ['seqname','start','end','nbases','ambigs','polymer'] - self.columns = 6 - -class Group(Tabular): - file_ext = 'groups' - def __init__(self, **kwd): - """Name file shows the relationship between a representative sequence(col 1) and the sequences it represents(col 2)""" - Tabular.__init__( self, **kwd ) - self.column_names = ['name','group'] - self.columns = 2 - -class Design(Tabular): - file_ext = 'design' - def __init__(self, **kwd): - """Name file shows the relationship between a group(col 1) and a grouping (col 2), providing a way to merge groups.""" - Tabular.__init__( self, **kwd ) - self.column_names = ['group','grouping'] - self.columns = 2 - -class AccNos(Tabular): - file_ext = 'accnos' - def __init__(self, **kwd): - """A list of names""" - Tabular.__init__( self, **kwd ) - self.column_names = ['name'] - self.columns = 1 - -class Oligos( data.Text ): - file_ext = 'oligos' - - def sniff( self, filename ): - """ - Determines whether the file is a otu (operational taxonomic unit) format - """ - try: - fh = open( filename ) - count = 0 - while True: - line = fh.readline() - line = line.strip() - if not line: - break #EOF - else: - if line[0] != '#': - linePieces = line.split('\t') - if len(linePieces) == 2 and re.match('forward|reverse',linePieces[0]): - count += 1 - continue - elif len(linePieces) == 3 and re.match('barcode',linePieces[0]): - count += 1 - continue - else: - return False - if count > 20: - return True - if count > 0: - return True - except: - pass - finally: - fh.close() - return False - -class Frequency(Tabular): - file_ext = 'freq' - def __init__(self, **kwd): - """A list of names""" - Tabular.__init__( self, **kwd ) - self.column_names = ['position','frequency'] - self.column_types = ['int','float'] - - def sniff( self, filename ): - """ - Determines whether the file is a frequency tabular format for chimera analysis - #1.14.0 - 0 0.000 - 1 0.000 - ... - 155 0.975 - """ - try: - fh = open( filename ) - count = 0 - while True: - line = fh.readline() - line = line.strip() - if not line: - break #EOF - else: - if line[0] != '#': - try: - linePieces = line.split('\t') - i = int(linePieces[0]) - f = float(linePieces[1]) - count += 1 - continue - except: - return False - if count > 20: - return True - if count > 0: - return True - except: - pass - finally: - fh.close() - return False - -class Quantile(Tabular): - file_ext = 'quan' - MetadataElement( name="filtered", default=False, no_value=False, optional=True , desc="Quantiles calculated using a mask", readonly=True) - MetadataElement( name="masked", default=False, no_value=False, optional=True , desc="Quantiles calculated using a frequency filter", readonly=True) - def __init__(self, **kwd): - """Quantiles for chimera analysis""" - Tabular.__init__( self, **kwd ) - self.column_names = ['num','ten','twentyfive','fifty','seventyfive','ninetyfive','ninetynine'] - self.column_types = ['int','float','float','float','float','float','float'] - def set_meta( self, dataset, overwrite = True, skip = None, **kwd ): - log.info( "Mothur Quantile set_meta %s" % kwd) - def sniff( self, filename ): - """ - Determines whether the file is a quantiles tabular format for chimera analysis - 1 0 0 0 0 0 0 - 2 0.309198 0.309198 0.37161 0.37161 0.37161 0.37161 - 3 0.510982 0.563213 0.693529 0.858939 1.07442 1.20608 - ... - """ - try: - fh = open( filename ) - count = 0 - while True: - line = fh.readline() - line = line.strip() - if not line: - break #EOF - else: - if line[0] != '#': - try: - linePieces = line.split('\t') - i = int(linePieces[0]) - f = float(linePieces[1]) - f = float(linePieces[2]) - f = float(linePieces[3]) - f = float(linePieces[4]) - f = float(linePieces[5]) - f = float(linePieces[6]) - count += 1 - continue - except: - return False - if count > 10: - return True - if count > 0: - return True - except: - pass - finally: - fh.close() - return False - -class FilteredQuantile(Quantile): - file_ext = 'filtered.quan' - def __init__(self, **kwd): - """Quantiles for chimera analysis""" - Quantile.__init__( self, **kwd ) - self.filtered = True - -class MaskedQuantile(Quantile): - file_ext = 'masked.quan' - def __init__(self, **kwd): - """Quantiles for chimera analysis""" - Quantile.__init__( self, **kwd ) - self.masked = True - self.filtered = False - -class FilteredMaskedQuantile(Quantile): - file_ext = 'filtered.masked.quan' - def __init__(self, **kwd): - """Quantiles for chimera analysis""" - Quantile.__init__( self, **kwd ) - self.masked = True - self.filtered = True - -class LaneMask(data.Text): - file_ext = 'filter' - - def sniff( self, filename ): - """ - Determines whether the file is a lane mask filter: 1 line consisting of zeros and ones. - """ - try: - fh = open( filename ) - while True: - buff = fh.read(1000) - if not buff: - break #EOF - else: - if not re.match('^[01]+$',line): - return False - return True - except: - pass - finally: - close(fh) - return False - -class SequenceTaxonomy(Tabular): - file_ext = 'seq.taxonomy' - """ - A table with 2 columns: - - SequenceName - - Taxonomy (semicolon-separated taxonomy in descending order) - Example: - X56533.1 Eukaryota;Alveolata;Ciliophora;Intramacronucleata;Oligohymenophorea;Hymenostomatida;Tetrahymenina;Glaucomidae;Glaucoma; - X97975.1 Eukaryota;Parabasalidea;Trichomonada;Trichomonadida;unclassified_Trichomonadida; - AF052717.1 Eukaryota;Parabasalidea; - """ - def __init__(self, **kwd): - Tabular.__init__( self, **kwd ) - self.column_names = ['name','taxonomy'] - - def sniff( self, filename ): - """ - Determines whether the file is a SequenceTaxonomy - """ - try: - pat = '^([^ \t\n\r\f\v;]+([(]\d+[)])?[;])+$' - fh = open( filename ) - count = 0 - while True: - line = fh.readline() - if not line: - break #EOF - line = line.strip() - if line: - fields = line.split('\t') - if len(fields) != 2: - return False - if not re.match(pat,fields[1]): - return False - count += 1 - if count > 10: - break - if count > 0: - return True - except: - pass - finally: - fh.close() - return False - -class RDPSequenceTaxonomy(SequenceTaxonomy): - file_ext = 'rdp.taxonomy' - """ - A table with 2 columns: - - SequenceName - - Taxonomy (semicolon-separated taxonomy in descending order, RDP requires exactly 6 levels deep) - Example: - AB001518.1 Bacteria;Bacteroidetes;Sphingobacteria;Sphingobacteriales;unclassified_Sphingobacteriales; - AB001724.1 Bacteria;Cyanobacteria;Cyanobacteria;Family_II;GpIIa; - AB001774.1 Bacteria;Chlamydiae;Chlamydiae;Chlamydiales;Chlamydiaceae;Chlamydophila; - """ - def sniff( self, filename ): - """ - Determines whether the file is a SequenceTaxonomy - """ - try: - pat = '^([^ \t\n\r\f\v;]+([(]\d+[)])?[;]){6}$' - fh = open( filename ) - count = 0 - while True: - line = fh.readline() - if not line: - break #EOF - line = line.strip() - if line: - fields = line.split('\t') - if len(fields) != 2: - return False - if not re.match(pat,fields[1]): - return False - count += 1 - if count > 10: - break - if count > 0: - return True - except: - pass - finally: - fh.close() - return False - -class ConsensusTaxonomy(Tabular): - file_ext = 'cons.taxonomy' - def __init__(self, **kwd): - """A list of names""" - Tabular.__init__( self, **kwd ) - self.column_names = ['OTU','count','taxonomy'] - -class TaxonomySummary(Tabular): - file_ext = 'tax.summary' - def __init__(self, **kwd): - """A Summary of taxon classification""" - Tabular.__init__( self, **kwd ) - self.column_names = ['taxlevel','rankID','taxon','daughterlevels','total'] - -class Phylip(data.Text): - file_ext = 'phy' - - def sniff( self, filename ): - """ - Determines whether the file is in Phylip format (Interleaved or Sequential) - The first line of the input file contains the number of species and the - number of characters, in free format, separated by blanks (not by - commas). The information for each species follows, starting with a - ten-character species name (which can include punctuation marks and blanks), - and continuing with the characters for that species. - http://evolution.genetics.washington.edu/phylip/doc/main.html#inputfiles - Interleaved Example: - 6 39 - Archaeopt CGATGCTTAC CGCCGATGCT - HesperorniCGTTACTCGT TGTCGTTACT - BaluchitheTAATGTTAAT TGTTAATGTT - B. virginiTAATGTTCGT TGTTAATGTT - BrontosaurCAAAACCCAT CATCAAAACC - B.subtilisGGCAGCCAAT CACGGCAGCC - - TACCGCCGAT GCTTACCGC - CGTTGTCGTT ACTCGTTGT - AATTGTTAAT GTTAATTGT - CGTTGTTAAT GTTCGTTGT - CATCATCAAA ACCCATCAT - AATCACGGCA GCCAATCAC - """ - try: - fh = open( filename ) - # counts line - line = fh.readline().strip() - linePieces = line.split() - count = int(linePieces[0]) - seq_len = int(linePieces[1]) - # data lines - """ - TODO check data lines - while True: - line = fh.readline() - # name is the first 10 characters - name = line[0:10] - seq = line[10:].strip() - # nucleic base or amino acid 1-char designators (spaces allowed) - bases = ''.join(seq.split()) - # float per base (each separated by space) - """ - return True - except: - pass - finally: - close(fh) - return False - - -class Axes(Tabular): - file_ext = 'axes' - - def __init__(self, **kwd): - """Initialize axes datatype""" - Tabular.__init__( self, **kwd ) - def sniff( self, filename ): - """ - Determines whether the file is an axes format - The first line may have column headings. - The following lines have the name in the first column plus float columns for each axis. - ==> 98_sq_phylip_amazon.fn.unique.pca.axes <== - group axis1 axis2 - forest 0.000000 0.145743 - pasture 0.145743 0.000000 - - ==> 98_sq_phylip_amazon.nmds.axes <== - axis1 axis2 - U68589 0.262608 -0.077498 - U68590 0.027118 0.195197 - U68591 0.329854 0.014395 - """ - try: - fh = open( filename ) - count = 0 - line = fh.readline() - line = line.strip() - col_cnt = None - while True: - line = fh.readline() - line = line.strip() - if not line: - break #EOF - if line: - fields = line.split('\t') - if col_cnt == None: # ignore values in first line as they may be column headings - col_cnt = len(fields) - else: - if len(fields) != col_cnt : - return False - try: - for i in range(1, col_cnt): - check = float(fields[i]) - except ValueError: - return False - count += 1 - if count > 10: - return True - if count > 0: - return True - except: - pass - finally: - fh.close() - return False - -## Qiime Classes - -class QiimeMetadataMapping(Tabular): - MetadataElement( name="column_names", default=[], desc="Column Names", readonly=False, visible=True, no_value=[] ) - file_ext = 'qiimemapping' - - def __init__(self, **kwd): - """ - http://qiime.sourceforge.net/documentation/file_formats.html#mapping-file-overview - Information about the samples necessary to perform the data analysis. - # self.column_names = ['#SampleID','BarcodeSequence','LinkerPrimerSequence','Description'] - """ - Tabular.__init__( self, **kwd ) - - def sniff( self, filename ): - """ - Determines whether the file is a qiime mapping file - Just checking for an appropriate header line for now, could be improved - """ - try: - pat = '#SampleID(\t[a-zA-Z][a-zA-Z0-9_]*)*\tDescription' - fh = open( filename ) - while True: - line = dataset_fh.readline() - if re.match(pat,line): - return True - except: - pass - finally: - close(fh) - return False - - def set_column_names(self, dataset): - if dataset.has_data(): - dataset_fh = open( dataset.file_name ) - line = dataset_fh.readline() - if line.startswith('#SampleID'): - dataset.metadata.column_names = line.strip().split('\t'); - dataset_fh.close() - - def set_meta( self, dataset, overwrite = True, skip = None, max_data_lines = None, **kwd ): - Tabular.set_meta(self, dataset, overwrite, skip, max_data_lines) - self.set_column_names(dataset) - -class QiimeOTU(Tabular): - """ - Associates OTUs with sequence IDs - Example: - 0 FLP3FBN01C2MYD FLP3FBN01B2ALM - 1 FLP3FBN01DF6NE FLP3FBN01CKW1J FLP3FBN01CHVM4 - 2 FLP3FBN01AXQ2Z - """ - file_ext = 'qiimeotu' - -class QiimeOTUTable(Tabular): - """ - #Full OTU Counts - #OTU ID PC.354 PC.355 PC.356 Consensus Lineage - 0 0 1 0 Root;Bacteria;Firmicutes;"Clostridia";Clostridiales - 1 1 3 1 Root;Bacteria - 2 0 2 2 Root;Bacteria;Bacteroidetes - """ - MetadataElement( name="column_names", default=[], desc="Column Names", readonly=False, visible=True, no_value=[] ) - file_ext = 'qiimeotutable' - def init_meta( self, dataset, copy_from=None ): - tabular.Tabular.init_meta( self, dataset, copy_from=copy_from ) - def set_meta( self, dataset, overwrite = True, skip = None, **kwd ): - self.set_column_names(dataset) - def set_column_names(self, dataset): - if dataset.has_data(): - dataset_fh = open( dataset.file_name ) - line = dataset_fh.readline() - line = dataset_fh.readline() - if line.startswith('#OTU ID'): - dataset.metadata.column_names = line.strip().split('\t'); - dataset_fh.close() - dataset.metadata.comment_lines = 2 - -class QiimeDistanceMatrix(Tabular): - """ - PC.354 PC.355 PC.356 - PC.354 0.0 3.177 1.955 - PC.355 3.177 0.0 3.444 - PC.356 1.955 3.444 0.0 - """ - file_ext = 'qiimedistmat' - def init_meta( self, dataset, copy_from=None ): - tabular.Tabular.init_meta( self, dataset, copy_from=copy_from ) - def set_meta( self, dataset, overwrite = True, skip = None, **kwd ): - self.set_column_names(dataset) - def set_column_names(self, dataset): - if dataset.has_data(): - dataset_fh = open( dataset.file_name ) - line = dataset_fh.readline() - # first line contains the names - dataset.metadata.column_names = line.strip().split('\t'); - dataset_fh.close() - dataset.metadata.comment_lines = 1 - -class QiimePCA(Tabular): - """ - Principal Coordinate Analysis Data - The principal coordinate (PC) axes (columns) for each sample (rows). - Pairs of PCs can then be graphed to view the relationships between samples. - The bottom of the output file contains the eigenvalues and % variation explained for each PC. - Example: - pc vector number 1 2 3 - PC.354 -0.309063936588 0.0398252112257 0.0744672231759 - PC.355 -0.106593922619 0.141125998277 0.0780204374172 - PC.356 -0.219869362955 0.00917241121781 0.0357281314115 - - - eigvals 0.480220500471 0.163567082874 0.125594470811 - % variation explained 51.6955484555 17.6079322939 - """ - file_ext = 'qiimepca' - -class QiimeParams(Tabular): - """ -###pick_otus_through_otu_table.py parameters### - -# OTU picker parameters -pick_otus:otu_picking_method uclust -pick_otus:clustering_algorithm furthest - -# Representative set picker parameters -pick_rep_set:rep_set_picking_method first -pick_rep_set:sort_by otu - """ - file_ext = 'qiimeparams' - -class QiimePrefs(data.Text): - """ - A text file, containing coloring preferences to be used by make_distance_histograms.py, make_2d_plots.py and make_3d_plots.py. - Example: -{ -'background_color':'black', - -'sample_coloring': - { - 'Treatment': - { - 'column':'Treatment', - 'colors':(('red',(0,100,100)),('blue',(240,100,100))) - }, - 'DOB': - { - 'column':'DOB', - 'colors':(('red',(0,100,100)),('blue',(240,100,100))) - } - }, -'MONTE_CARLO_GROUP_DISTANCES': - { - 'Treatment': 10, - 'DOB': 10 - } -} - """ - file_ext = 'qiimeprefs' - -class QiimeTaxaSummary(Tabular): - """ - Taxon PC.354 PC.355 PC.356 - Root;Bacteria;Actinobacteria 0.0 0.177 0.955 - Root;Bacteria;Firmicutes 0.177 0.0 0.444 - Root;Bacteria;Proteobacteria 0.955 0.444 0.0 - """ - MetadataElement( name="column_names", default=[], desc="Column Names", readonly=False, visible=True, no_value=[] ) - file_ext = 'qiimetaxsummary' - - def set_column_names(self, dataset): - if dataset.has_data(): - dataset_fh = open( dataset.file_name ) - line = dataset_fh.readline() - if line.startswith('Taxon'): - dataset.metadata.column_names = line.strip().split('\t'); - dataset_fh.close() - - def set_meta( self, dataset, overwrite = True, skip = None, max_data_lines = None, **kwd ): - Tabular.set_meta(self, dataset, overwrite, skip, max_data_lines) - self.set_column_names(dataset) - -if __name__ == '__main__': - import doctest, sys - doctest.testmod(sys.modules[__name__]) - diff -r 2c9714f56480 -r d80000f5ad20 qiime/make_2d_plots.xml --- a/qiime/make_2d_plots.xml Wed Jun 06 14:50:36 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,64 +0,0 @@ - - Make 2D PCoA Plots - - make_2d_plots.py - - - qiime_wrapper.py - --galaxy_outputdir=$plot.extra_files_path - ##--galaxy_tmpdir='$__new_file_path__' - ##--galaxy_datasets='^\S+_2D_PCoA_plots\.html$:'$plot - --galaxy_datasets='^\S+\.html$:'$plot - ##--galaxy_datasetid=$output1.id - ##--galaxy_new_files_path='$__new_file_path__' - ##--galaxy_tmpdir='$__new_file_path__' - make_2d_plots.py - --coord_fname=$coord_fname - --map_fname=$map_fname - #if $colorby != None and $colorby.__str__ != 'None': - --colorby=$colorby - #end if - #if $prefs_path != None and $prefs_path.__str__ != 'None' and len($prefs_path.__str__) > 0: - --prefs_path=$prefs_path - #end if - --background_color=$background_color - --ellipsoid_opacity=$ellipsoid_opacity - --ellipsoid_method=$ellipsoid_method - #if $master_pcoa != None and $master_pcoa.__str__ != 'None' and len($master_pcoa.__str__) > 0: - --master_pcoa=$master_pcoa - #end if - --output_dir=$plot.extra_files_path - - - - - - - - - - - - - - - - - - - - For more information, see make_2d_plots_ in the Qiime documentation. - -Updated and validated 01/18/12 - - .. _make_2d_plots: http://qiime.org/scripts/make_2d_plots.html - - diff -r 2c9714f56480 -r d80000f5ad20 qiime/make_3d_plots.xml --- a/qiime/make_3d_plots.xml Wed Jun 06 14:50:36 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,131 +0,0 @@ - - Make 3D PCoA plots - - make_3d_plots.py - - - qiime_wrapper.py - --galaxy_outputdir=$plot.extra_files_path - --galaxy_datasets='^\S+\.html$:'$plot - make_3d_plots.py - --coord_fname=$coord_fname - --map_fname=$map_fname - #if $colorby != None and $colorby.__str__ != 'None': - --colorby=$colorby - #end if - #if $custom_axes != None and $custom_axes.__str__ != 'None': - --custom_axes=$custom_axes - #end if - #if $prefs_path != None and $prefs_path.__str__ != 'None' and len($prefs_path.__str__) > 0: - --prefs_path=$prefs_path - #end if - --background_color=$background_color - --output_dir=$__new_file_path__ - --ellipsoid_smoothness=$ellipsoid_smoothness - --ellipsoid_opacity=$ellipsoid_opacity - --ellipsoid_method=$ellipsoid_method - #if $taxa_fname != None and $taxa_fname.__str__ != "": - --taxa_fname=$taxa_fname - #end if - --n_taxa_keep=$n_taxa_keep - #if $biplot_output_file != None and $biplot_output_file.__str__ != "": - --biplot_output_file=$biplot_output_file - #end if - #if $master_pcoa != None and $master_pcoa.__str__ != 'None' and len($master_pcoa.__str__) > 0: - --master_pcoa=$master_pcoa - #end if - --output_format=$output_format - --interpolation_points=$interpolation_points - --polyhedron_points=$polyhedron_points - --polyhedron_offset=$polyhedron_offset - #if $add_vectors2.__str__ != "" and $add_vectors1.__str__ != "": - ##--add_vectors='$add_vectors1','$addvectors2' - --add_vectors=#echo ','.join($add_vectors1,$add_vectors2) - #else if $add_vectors1.__str__ != "": - --add_vectors=$add_vectors1 - #end if - #if $rms_algorithm.__str__ != "None" and len($add_vectors1.__str__) > 0: - --rms_algorithm=$rms_algorithm - #end if - #if $rms_path != None and $rms_path.__str__ != "" and len($rms_path.__str__) > 0 and len($add_vectors1.__str__) > 0: - --rms_path=$rms_path - #end if - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -weighted_unifrac_pc.txt_3D.html - - -]]> - - - - - - - - - - - .. class:: warningmark: NOTE: Make 3D Plots is not validated. Use at your own risk. - - For more information, see make_3d_plots_ in the Qiime documentation. - - .. _make_3d_plots: http://qiime.org/scripts/make_3d_plots.html - - diff -r 2c9714f56480 -r d80000f5ad20 qiime/make_3d_plots_old.xml --- a/qiime/make_3d_plots_old.xml Wed Jun 06 14:50:36 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,89 +0,0 @@ - - Make 3D PCoA plots - - make_3d_plots.py - - - qiime_wrapper.py - --galaxy_summary_html='$output_html' - --galaxy_outputdir='$output_html.extra_files_path' - --galaxy_summary_template='$output_template' - make_3d_plots.py - --coord_fname=$coord_fname - --map_fname=$map_fname - --colorby=$colorby - --custom_axes=$custom_axes - --prefs_path=$prefs_path - --background_color=$background_color - --output_dir=$__new_file_path__ - --ellipsoid_smoothness=$ellipsoid_smoothness - --ellipsoid_opacity=$ellipsoid_opacity - --ellipsoid_method=$ellipsoid_method - --taxa_fname=$taxa_fname - --n_taxa_keep=$n_taxa_keep - --biplot_output_file=$biplot_output_file - --master_pcoa=$master_pcoa - --output_format=$output_format - --interpolation_points=$interpolation_points - --polyhedron_points=$polyhedron_points - --polyhedron_offset=$polyhedron_offset - - - - - - - - - - - - - - - - - - - - - - - - - - - -weighted_unifrac_pc.txt_3D.html - - -]]> - - - - - - - - - - - - diff -r 2c9714f56480 -r d80000f5ad20 qiime/make_distance_histograms.xml --- a/qiime/make_distance_histograms.xml Wed Jun 06 14:50:36 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,54 +0,0 @@ - - Make distance histograms - - make_distance_histograms.py - - - qiime_wrapper.py - --galaxy_outputdir=$plot.extra_files_path - --galaxy_datasets='^\S+\.html$:'$plot - make_distance_histograms.py - --distance_matrix_file=$distance_matrix_file - --map_fname=$map_fname - #if $prefs_path != None and $prefs_path.__str__ != 'None' and len($prefs_path.__str__) > 0: - --prefs_path=$prefs_path - #end if - --dir_path=$plot.extra_files_path - --background_color=$background_color - $monte_carlo - #if $fields != None and $fields.__str__ != ' ' and $fields.__str__ !='': - --fields=$fields - #end if - --monte_carlo_iters=$monte_carlo_iters - - - - - - - - - - - - - - - - - - - For more information, see make_distance_histograms_ in the Qiime documentation. - -Updated and validated 01/18/12 - - .. _make_distance_histograms: http://qiime.org/scripts/make_distance_histograms.html - - diff -r 2c9714f56480 -r d80000f5ad20 qiime/make_otu_table.xml --- a/qiime/make_otu_table.xml Wed Jun 06 14:50:36 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,37 +0,0 @@ - - Make OTU table - - make_otu_table.py - - - qiime_wrapper.py - make_otu_table.py - --otu_map_fp=$otu_map_fp - --output_fp=$output_fp - #if $taxonomy.__str__ != 'None': - --taxonomy=$taxonomy - #end if - #if $exclude_otus_fp.__str__ != 'None': - --exclude_otus_fp=$exclude_otus_fp - #end if - - - - - - - - - - - - For more information, see make_otu_table_ in the Qiime documentation. - -Updated and validated 01/16/12 - -.. _make_otu_table: http://qiime.org/scripts/make_otu_table - - diff -r 2c9714f56480 -r d80000f5ad20 qiime/make_phylogeny.xml --- a/qiime/make_phylogeny.xml Wed Jun 06 14:50:36 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,47 +0,0 @@ - - Make Phylogeny - - make_phylogeny.py - - - qiime_wrapper.py - --galaxy_inputdir='$__new_file_path__' - --galaxy_ext_change='$input_fp' - --galaxy_new_ext='fasta' - make_phylogeny.py - --input_fp='$__new_file_path__'/temporary.fasta - --tree_method=$tree_method - --result_fp=$result_fp - --log_fp=$log_fp - --root_method=$root_method - - - - - - - - - - - - - - - - - - - - - - For more information, see make_phylogeny_ in the Qiime documentation. - -Updated and validated 01/16/12 - -.. _make_phylogeny: http://qiime.org/scripts/make_phylogeny.html - - diff -r 2c9714f56480 -r d80000f5ad20 qiime/make_prefs_file.xml --- a/qiime/make_prefs_file.xml Wed Jun 06 14:50:36 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,52 +0,0 @@ - - Generate preferences file - - make_prefs_file.py - - - qiime_wrapper.py - make_prefs_file.py - --map_fname=$map_fname - --output_fp=$output_fp - #if $mapping_headers_to_use != None and $mapping_headers_to_use.__str__ != '': - --mapping_headers_to_use=$mapping_headers_to_use - #end if - --background_color=$background_color - --monte_carlo_dists=$monte_carlo_dists - #if $input_taxa_file != None and $input_taxa_file.__str__ != '' and $input_taxa_file.__str__ != 'None': - --input_taxa_file=$input_taxa_file - #end if - --ball_scale=$ball_scale - --arrow_line_color=$arrow_line_color - --arrow_head_color=$arrow_head_color - - - - - - - - - - - - - - - - - For more information, see make_prefs_file_ in the Qiime documentation. - -Updated and validated 01/18/12 - - .. _make_prefs_file: http://qiime.org/scripts/make_prefs_file.html - - diff -r 2c9714f56480 -r d80000f5ad20 qiime/multiple_rarefactions.xml --- a/qiime/multiple_rarefactions.xml Wed Jun 06 14:50:36 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,44 +0,0 @@ - - Perform multiple subsamplings/rarefactions on an otu table - - multiple_rarefactions.py - - - qiime_wrapper.py - --galaxy_logfile=$output1 - --galaxy_outputdir=$output1.extra_files_path - multiple_rarefactions.py - --input_path=$input_path - --output_path=$output1.extra_files_path - --min=$min - --max=$max - --step=$step - --num-reps=$num_reps - $lineages_included - $keep_empty_otus - - - - - - - - - - - - - - - - This tool rarefies OTU tables for use in jackknife, bootstrap, and rarefaction analyses. Samples with fewer sequences than the rarefaction depth requested for a given output otu table are omitted from those otu tables. The input is an OTU table (e.g., the output from make_otu_table). The output file is a log file listing all the rarefied otu tables produced. - - - diff -r 2c9714f56480 -r d80000f5ad20 qiime/per_library_stats.xml --- a/qiime/per_library_stats.xml Wed Jun 06 14:50:36 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,33 +0,0 @@ - - Calculate per library statistics - - per_library_stats.py - - - qiime_wrapper.py - --galaxy_logfile=$logfile - per_library_stats.py - --otu_table_fp=$otu_table_fp - #if $mapfile != None and $mapfile.__str__ != 'None' and $mapfile.__str__ != '': - --mapfile=$mapfile - #end if - --outputfile=$outputfile - - - - - - - - - - - - For more information, see per_library_stats_ in the Qiime documentation. - -Updated and validated 01/18/12 - - .. _per_library_stats: http://qiime.org/scripts/per_library_stats.html - - diff -r 2c9714f56480 -r d80000f5ad20 qiime/pick_otus.xml --- a/qiime/pick_otus.xml Wed Jun 06 14:50:36 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,169 +0,0 @@ - - OTU picking - - pick_otus.py - - - qiime_wrapper.py - --galaxy_outputdir='$log.extra_files_path' - #if $pick.otu_picking_method == 'uclust' and $pick.refseqs_fp.__str__ != 'None': - --galaxy_datasets='^\S+_otus\.txt$:'$otus,'^\S+_otus\.log$:'$log,'^\S+_failures\.txt$:'$failures - #else: - --galaxy_datasets='^\S+_otus\.txt$:'$otus,'^\S+_otus\.log$:'$log - #end if - pick_otus.py - --input_seqs_filepath=$input_seqs_filepath - #if $pick.otu_picking_method.__str__ == 'uclust': - #if $pick.refseqs_fp.__str__ != 'None': - --refseqs_fp=$pick.refseqs_fp - --otu_picking_method='uclust_ref' - $pick.suppress_new_clusters - #else: - --otu_picking_method=$pick.otu_picking_method - #end if - --similarity=$pick.similarity - $pick.enable_rev_strand_match - $pick.optimal_uclust - $pick.exact_uclust - $pick.user_sort - $pick.suppress_presort_by_abundance_uclust - --max_accepts=$pick.max_accepts - --max_rejects=$pick.max_rejects - #if $pick.uclust_otu_id_prefix != None and $pick.uclust_otu_id_prefix.__str__ != 'None' and $pick.uclust_otu_id_prefix.__str__ != '': - --uclust_otu_id_prefix=$pick.uclust_otu_id_prefix - #end if - $pick.uclust_stable_sort - $pick.save_uc_files - #elif $pick.otu_picking_method.__str__ == 'mothur': - --otu_picking_method=$pick.otu_picking_method - --clustering_algorithm=$pick.clustering_algorithm - --similarity=$pick.similarity - #elif $pick.otu_picking_method.__str__ == 'trie': - --otu_picking_method=$pick.otu_picking_method - $pick.trie_reverse_seqs - #elif $pick.otu_picking_method.__str__ == 'prefix_suffix': - --otu_picking_method=$pick.otu_picking_method - --prefix_length=$pick.prefix_length - --suffix_length=$pick.suffix_length - #elif pick.otu_picking_method.__str__ == 'blast': - --otu_picking_method=$pick.otu_picking_method - #if $refseqs_fp.__str__ != 'None': - --refseqs_fp=$pick.refseqs_fp - #end if - #if $pick.blast_db != None and $pick.blast_db.__str__ != 'None' and $pick.blast_db.__str__ != '': - --blast_db=$pick.blast_db - #end if - --similarity=$pick.similarity - --max_e_value=$pick.max_e_value - --min_aligned_percent=$pick.min_aligned_percent - #elif $pick.otu_picking_method == 'cdhit': - --otu_picking_method=$pick.otu_picking_method - --similarity=$pick.similarity - --max_cdhit_memory=$pick.max_cdhit_memory - #if $pick.prefix_prefilter_length != 0: - --prefix_prefilter_length=$pick.prefix_prefilter_length - #end if - $pick.trie_prefilter - #end if - --output_dir='$log.extra_files_path' - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - (pick['otu_picking_method'] == 'uclust' and pick['refseqs_fp']) - - - - - For more information, see pick_otus_ in the Qiime documentation. - -Updated and validated 01/16/12 - -.. _pick_otus: http://qiime.org/scripts/pick_otus.html - - diff -r 2c9714f56480 -r d80000f5ad20 qiime/pick_rep_set.xml --- a/qiime/pick_rep_set.xml Wed Jun 06 14:50:36 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,51 +0,0 @@ - - Pick representative set of sequences - - pick_rep_set.py - - - qiime_wrapper.py - pick_rep_set.py - --input_file=$input_file - #if $reference_seqs_fp.__str__ != 'None' or $reference_seqs_fp != None and $reference_seqs_fp.__str__ == '': - --reference_seqs_fp=$reference_seqs_fp - #else: - --fasta_file=$fasta_file - #end if - --rep_set_picking_method=$rep_set_picking_method - --sort_by=$sort_by - --log_fp=$log_fp - --result_fp=$result_fp - - - - - - - - - - - - - - - - - - - - - - - For more information, see pick_rep_set_ in the Qiime documentation. - -Updated and validated 01/16/12 - -.. _pick_rep_set: http://qiime.org/scripts/pick_rep_set.html - - diff -r 2c9714f56480 -r d80000f5ad20 qiime/plot_taxa_summary.xml --- a/qiime/plot_taxa_summary.xml Wed Jun 06 14:50:36 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,97 +0,0 @@ - - Make taxaonomy summary charts based on taxonomy assignment - - plot_taxa_summary.py - - - qiime_wrapper.py - --galaxy_outputdir='$taxonomy_summary_chart.extra_files_path' - --galaxy_datasets='^\S+_charts\.html$:'$taxonomy_summary_chart - plot_taxa_summary.py - #set $counts = [] - #for i in $inputs: - #set $counts = $counts + [$i.counts_fname.__str__] - #end for - --counts_fname=#echo ','.join($counts) - #if $labels != None and $labels.__str__ != 'None' and $labels.__str__ != '': - --labels=$labels - #end if - --num_categories=$num_categories - #if $colorby != None and $colorby.__str__ != 'None' and $colorby.__str__ != '': - --colorby=$colorby - #end if - #if $prefs_path != None and $prefs_path.__str__ != 'None': - --prefs_path=$prefs_path - #end if - --background_color=$background_color - --dpi=$dpi - --x_width=$x_width - --y_height=$y_height - --bar_width=$bar_width - --type_of_file=$type_of_file - --chart_type=$chart_type - --resize_nth_label=$resize_nth_label - $include_html_legend - $include_html_counts - --label_type=$label_type - --dir_path='$taxonomy_summary_chart.extra_files_path' - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - For more information, see plot_taxa_summary_ in the Qiime documentation. - -Updated and validated 01/20/12 - - .. _plot_taxa_summary: http://qiime.org/scripts/plot_taxa_summary.html - - diff -r 2c9714f56480 -r d80000f5ad20 qiime/principal_coordinates.xml --- a/qiime/principal_coordinates.xml Wed Jun 06 14:50:36 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,51 +0,0 @@ - - Principal Coordinates Analysis (PCoA) - - principal_coordinates.py - - - qiime_wrapper.py - #if $run_type.input_type.__str__ == 'multi': - --galaxy_new_datasets='^\S+\.txt$:txt' - --galaxy_new_files_path='$__new_file_path__' - --galaxy_datasetid=$output_path.id - --galaxy_logfile=$logfile - #end if - principal_coordinates.py - #if $run_type.input_type.__str__ == "multi": - --input_path=$input_path.extra_files_path - --output_path='$__new_file_path__' - #else: - --input_path=$input_path - --output_path=$output_path - #end if - - - - - - - - - - - - - - - - - - - - - - For more information, see principle_coordinates_ in the Qiime documentation. - - Updated and validated 01/18/12 - - .. _principle_coordinates: http://qiime.org/scripts/principal_coordinates.html - - diff -r 2c9714f56480 -r d80000f5ad20 qiime/process_sff.xml --- a/qiime/process_sff.xml Wed Jun 06 14:50:36 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,34 +0,0 @@ - - Convert sff to FASTA and QUAL files - - process_sff.py - - - qiime_wrapper.py - process_sff.py - --input_dir=$input_dir - $make_flowgram - $convert_to_FLX - $use_sfftools - --output_dir=$__new_file_path__ - - - - - - - - - - - - - - - - - diff -r 2c9714f56480 -r d80000f5ad20 qiime/qiime_wrapper.py --- a/qiime/qiime_wrapper.py Wed Jun 06 14:50:36 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,292 +0,0 @@ -#!/usr/bin/env python -import logging, os, string, sys, tempfile, glob, shutil, types, urllib, optparse, re -import shlex, subprocess - -""" -sys.argv -this --galaxy_datasets= --quime_script - -alpha_rarefaction - output html - wf_arare/alpha_rarefaction_plots/rarefaction_plots.html - wf_arare/alpha_rarefaction_plots/html_plots/ - wf_arare/alpha_div - wf_arare/alpha_div/alpha_rarefaction_101_0.txt - - --galaxy_summary_html=$output_html - --galaxy_summary_template=$output_template - --galaxy_summary_links='label:link,label:link' - --galaxy_outputdir=$output_html.extra_files_path - - -""" - -def stop_err( msg ): - sys.stderr.write( "%s\n" % msg ) - sys.exit() - -def __main__(): - debug = False - tmp_dir = None - inputdir = None - outputdir = None - dataset_patterns = None - datasetid = None - new_dataset_patterns = None - new_files_path = None - summary_html=None - summary_template=None - summary_links=None - ## adds "log file" printing capabilities for primary output in dynamic file output - logfile = None - ## added support for correcting file extensions - newext = None - extchange = None - ## check if there are files to generate - cmd_args = [] - for arg in sys.argv[1:]: - if arg.startswith('--galaxy_'): - (opt,val) = arg.split('=') if arg.find('=') > 0 else (arg,None) - ''' - if opt == '--galaxy_tmpdir': - try: - if not os.path.exists(val): - os.makedirs(val) - tmp_dir = val - except Exception, ex: - stop_err(ex) - ''' - if opt == '--galaxy_outputdir': - try: - if not os.path.exists(val): - os.makedirs(val) - outputdir = val - except Exception, ex: - stop_err(ex) - if opt == '--galaxy_datasets': - dataset_patterns = val.split(',') - if opt == '--galaxy_datasetid': - datasetid = val - if opt == '--galaxy_new_datasets': - new_dataset_patterns = val.split(',') - if opt == '--galaxy_new_files_path': - if not os.path.exists(val): - os.makedirs(val) - new_files_path = val - if opt == '--galaxy_summary_html': - summary_html=val - if opt == '--galaxy_summary_template': - summary_template=val - if opt == '--galaxy_summary_links': - summary_links=val - if opt == '--galaxy_debug': - debug = True - if opt == '--galaxy_logfile': - logfile = val - if opt == '--galaxy_ext_change': - extchange = val - if opt == '--galaxy_new_ext': - newext = val - if opt == '--galaxy_inputdir': - inputdir = val - else: - cmd_args.append(arg) - if debug: print >> sys.stdout, '\n : '.join(cmd_args) - try: - stderr = '' - # allow for changing of file extension for files which require it - if extchange != None and inputdir != None and newext != None: - #newfile = os.path.join(inputdir,"temporary."+newext) - try: - os.link(extchange,inputdir+"/temporary."+newext) - except: - shutil.copy2(extchange,inputdir+"/temporary."+newext) - cmdline = ' '.join(cmd_args) - if debug: print >> sys.stdout, cmdline - ''' - if tmp_dir == None or not os.path.isdir(tmp_dir): - tmp_dir = tempfile.mkdtemp() - if outputdir == None or not os.path.isdir(outputdir): - outputdir = tmp_dir - ''' - tmp_stderr_name = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix='.err' ).name - tmp_stderr = open( tmp_stderr_name, 'wb' ) - tmp_stdout_name = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix='.out' ).name - tmp_stdout = open( tmp_stdout_name, 'wb' ) - if logfile != None: - logwrite = open(logfile, 'a') - proc = subprocess.Popen( args=cmdline, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno(), stdout=logwrite ) - else: - subprocess.call("macqiime"); - proc = subprocess.Popen( args=cmdline, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno(), stdout=tmp_stdout.fileno() ) - returncode = proc.wait() - tmp_stderr.close() - if logfile != None: - logwrite.close() - # get stderr, allowing for case where it's very large - tmp_stderr = open( tmp_stderr_name, 'rb' ) - buffsize = 1048576 - try: - while True: - stderr += tmp_stderr.read( buffsize ) - if not stderr or len( stderr ) % buffsize != 0: - break - if debug: print >> sys.stderr, stderr - except OverflowError: - pass - tmp_stderr.close() - if returncode != 0: - if debug: print >> sys.stderr, "returncode = %d" % returncode - raise Exception, stderr - #raise Exception, sys.stderr - # collect results - if dataset_patterns != None: - for root, dirs, files in os.walk(outputdir): - for fname in files: - fpath = os.path.join(root,fname) - if dataset_patterns != None: - for output in dataset_patterns: - (pattern,path) = output.split(':') - if debug: print >> sys.stdout, '%s -> %s' % (pattern,path) - if path == None or path == 'None': - continue - if debug: print >> sys.stdout, 'outdir %s match: %s' % (fname,re.match(pattern,fname)) - if re.match(pattern,fname): - found = True - # flist.remove(fname) - try: - shutil.copy2(fpath, path) - if new_files_path != None: - os.link(fpath, os.path.join(new_files_path,fname)) - except Exception, ex: - stop_err('%s' % ex) - # move result to outdir - # Need to flatten the dir hierachy in order for galaxy to serve the href links - if summary_html != None: - """ - for root, dirs, files in os.walk(outputdir): - if root != outputdir: - for fname in files: - fpath = os.path.join(root,fname) - """ - ## move everything up one level - dlist = os.listdir(outputdir) - for dname in dlist: - dpath = os.path.join(outputdir,dname) - if os.path.isdir(dpath): - flist = os.listdir(dpath) - for fname in flist: - fpath = os.path.join(dpath,fname) - shutil.move(fpath,outputdir) - if summary_template != None: - shutil.copy(summary_template,summary_html) - """ - flist = os.listdir(outputdir) - if debug: print >> sys.stdout, 'outputdir: %s' % outputdir - if debug: print >> sys.stdout, 'files: %s' % ','.join(flist) - if dataset_patterns != None: - for output in dataset_patterns: - (pattern,path) = output.split(':') - if debug: print >> sys.stdout, '%s -> %s' % (pattern,path) - if path == None or path == 'None': - continue - for fname in flist: - if debug: print >> sys.stdout, 'outdir %s match: %s' % (fname,re.match(pattern,fname)) - if re.match(pattern,fname): - found = True - flist.remove(fname) - fpath = os.path.join(outputdir,fname) - try: - shutil.copy2(fpath, path) - except Exception, ex: - stop_err('%s' % ex) - """ - # Handle the dynamically generated galaxy datasets - # http://bitbucket.org/galaxy/galaxy-central/wiki/ToolsMultipleOutput - # --new_datasets = specifies files to be found in the new_file_path - # The list items are separated by commas - # Each item conatins: a regex pattern for matching filenames and a galaxy datatype (separated by :) - # The regex match.groups()[0] is used as the id name of the dataset, and must result in unique name for each output - # The --galaxy_output flag is used for instances where data needs to be copied to the extra_files_path for later - # directory use - if new_dataset_patterns != None and new_files_path != None and datasetid != None: - for output in new_dataset_patterns: - if ':' in output: pattern,ext = output.split(':',1) - flist = os.listdir(new_files_path) - for fname in flist: - m = re.match(pattern,fname) - if m: - fpath = os.path.join(new_files_path,fname) - if len(m.groups()) > 0: - root = m.groups()[0] - else: - # remove the ext from the name if it exists, galaxy will add back later - # remove underscores since galaxy uses that as a field separator for dynamic datasets - root = re.sub('\.?'+ext+'$','',fname).replace('_','').replace('.','') - # filename pattern required by galaxy - fn = "%s_%s_%s_%s_%s" % ( 'primary', datasetid, root, 'visible', ext ) - if debug: print >> sys.stdout, '> %s' % fpath - if debug: print >> sys.stdout, '< %s' % os.path.join(new_files_path,fn) - try: - os.link(fpath, os.path.join(new_files_path,fn)) - # needed for files with variable output and a directory structure - if outputdir != None: - os.link(fpath, os.path.join(outputdir,fname)) - # clean out files from tmp directory, may be unnecessary - #os.remove(fpath) - except: - shutil.copy2(fpath, os.path.join(new_files_path,fn)) - # needed for files with variable output and a directory structure - if outputdir != None: - os.link(fpath, os.path.join(outputdir,fname)) - except Exception, e: - msg = str(e) + stderr - #msg = str(e) + str(sys.stderr) - #stop_err( 'Error running ' + msg) - finally: - # Only remove temporary directories and files from temporary directory - # Enclose in try block, so we don't report error on stale nfs handles - try: - if logfile != None: - if outputdir != None: - logwrite = open(logfile, 'a') - logwrite.write('Tool started. Files created by tool: \n') - flist = os.listdir(outputdir) - for fname in flist: - if 'DS_Store' not in fname and 'primary' not in fname: - logwrite.write(fname+'\n') - logwrite.write('Tool Finished.') - logwrite.close() - if new_files_path != None: - logwrite = open(logfile, 'a') - logwrite.write('Tool started. Files created by tool: \n') - flist = os.listdir(new_files_path) - for fname in flist: - if 'DS_Store' not in fname and 'primary' not in fname: - logwrite.write(fname+'\n') - logwrite.write('Tool Finished.') - logwrite.close() - if tmp_dir != None and os.path.exists(tmp_dir) and os.path.isfile(tmp_dir): - #shutil.rmtree(tmp_dir) - pass - if outputdir != None and 'files' not in outputdir: - flist = os.listdir(outputdir) - for fname in flist: - if 'DS_Store' not in fname and 'primary' not in fname: - os.remove(os.path.join(outputdir,fname)) - if inputdir != None and 'files' not in inputdir: - flist = os.listdir(inputdir) - for fname in flist: - if 'DS_Store' not in fname and 'primary' not in fname: - os.remove(os.path.join(inputdir,fname)) - if new_files_path != None and 'files' not in new_files_path: - flist = os.listdir(new_files_path) - for fname in flist: - if 'DS_Store' not in fname and 'primary' not in fname: - os.remove(os.path.join(new_files_path,fname)) - - except: - pass - -if __name__ == "__main__": __main__() - diff -r 2c9714f56480 -r d80000f5ad20 qiime/single_rarefaction.xml --- a/qiime/single_rarefaction.xml Wed Jun 06 14:50:36 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,34 +0,0 @@ - - Perform rarefaction on an otu table - - single_rarefaction.py - - - qiime_wrapper.py - single_rarefaction.py - --input_path=$input_path - --output_path=$output_path - --depth=$depth - $suppress_lineages_included - $keep_empty_otus - - - - - - - - - - - - - For more information, see single_rarefaction_ in the Qiime documentation. - - .. _single_rarefaction: http://qiime.org/scripts/single_rarefaction.html - - diff -r 2c9714f56480 -r d80000f5ad20 qiime/split_libraries.xml --- a/qiime/split_libraries.xml Wed Jun 06 14:50:36 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,146 +0,0 @@ - - Split libraries according to barcodes specified in mapping file - - split_libraries.py - - - qiime_wrapper.py - --galaxy_datasets='^seqs\.fna$:'$sequences,'histograms\.txt:'$histograms,'split_library_log\.txt:'$log - --galaxy_outputdir='$log.extra_files_path' - split_libraries.py - --dir-prefix='$log.extra_files_path' - --map=$map - #set fnas = [] - #for i in $inputs: - #set fnas = $fnas + [$i.fasta.__str__] - #end for - --fasta=#echo ','.join($fnas) - #set quals = [] - #for i in $inputs: - #if $i.qual != None and $i.qual.__str__ != 'None': - #set quals = $quals + [$i.qual.__str__] - #end if - #end for - #if len($quals) > 0: - --qual=#echo ','.join($quals) - #end if - #if len($min_seq_length.__str__) > 0 and $min_seq_length > 0: - --min-seq-length=$min_seq_length - #end if - #if len($max_seq_length.__str__) > 0: - --max-seq-length=$max_seq_length - #end if - $trim_seq_length - #if len($min_qual_score.__str__) > 0: - --min-qual-score=$min_qual_score - #end if - $keep_primer - $keep_barcode - #if len($max_ambig.__str__) > 0: - --max-ambig=$max_ambig - #end if - #if len($max_homopolymer.__str__) > 0: - --max-homopolymer=$max_homopolymer - #end if - #if len($max_primer_mismatch.__str__) > 0: - --max-primer-mismatch=$max_primer_mismatch - #end if - --barcode-type=$barcode_type - #if $max_barcode_errors >= 0.: - --max-barcode-errors=$max_barcode_errors - #end if - #if len($start_numbering_at.__str__) > 0: - --start-numbering-at=$start_numbering_at - #end if - $retain_unassigned_reads - $disable_bc_correction - #if len($qual_score_window.__str__) > 0: - --qual_score_window=$qual_score_window - #end if - $disable_primers - --reverse_primers=$reverse_primers - #if $reverse_primer_mismatches != None and $reverse_primer_mismatches.__str__ != "" and $reverse_primers.__str__!='disable': - --reverse_primer_mismatches=$reverse_primer_mismatches - #end if - $record_qual_scores - $discard_bad_windows - #if $median_length_filtering != None and $median_length_filtering.__str__ != "": - --median_length_filtering=$median_length_filtering - #end if - #if $added_demultiplex_field != None and $added_demultiplex_field.__str__ != "": - --added_demultiplex_field=$added_demultiplex_field - #end if - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - For more information, see split_libraries_ in the Qiime documentation. - -Updated and validated 01/19/12 - - .. _split_libraries: http://qiime.org/scripts/split_libraries.html - - - diff -r 2c9714f56480 -r d80000f5ad20 qiime/summarize_taxa.xml --- a/qiime/summarize_taxa.xml Wed Jun 06 14:50:36 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,88 +0,0 @@ - - Summarize Taxa - - summarize_taxa.py - - - qiime_wrapper.py - #set $levelnums = str($level).split(",") - #set $filestr = "" - #if $len($level.__str__) > 1: - #for $i in $levelnums: - #set $filestr = $filestr + '^\\\\S+_L'+$i+'\\\\.txt$:txt,' - #end for - --galaxy_new_datasets=$filestr - --galaxy_datasetid=$output1.id - --galaxy_new_files_path='$__new_file_path__' - --galaxy_logfile=$output1.__str__ - #else: - --galaxy_datasets='^\S+_L'$level'\.txt$:'$output1 - --galaxy_outputdir='$output1.extra_files_path' - #end if - summarize_taxa.py - --otu_table_fp=$otu_table_fp - --level=$level - #if $mapping != None and $mapping.__str__ != 'None': - --mapping=$mapping - #end if - #if $len($level.__str__) > 1: - --output_dir=$__new_file_path__ - #else: - --output_dir='$output1.extra_files_path' - #end if - $absolute_abundance - #if $lower_percentage > 0.0 and $lower_percentage.__str__ != '': - --lower_percentage=$lower_percentage - #end if - #if $upper_percentage > 0.0 and $upper_percentage.__str__ != '': - --upper_percentage=$upper_percentage - #end if - $transposed_output - #if $delimiter.__str__ != ';': - --delimiter=$delimiter - #end if - - - - - - - - - - - - - - - - - - - - - - - - - .. class:: warningmark: Please reload your browser to see all results in your history. - -For more information, see summarize_taxa_ in the Qiime documentation. - -Updated and validated 01/19/12 - - .. _summarize_taxa: http://qiime.org/scripts/summarize_taxa.html - - diff -r 2c9714f56480 -r d80000f5ad20 qiime/tree_compare.xml --- a/qiime/tree_compare.xml Wed Jun 06 14:50:36 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,28 +0,0 @@ - - Compare jackknifed/bootstrapped trees - - tree_compare.py - - - qiime_wrapper.py - --galaxy_tmpdir='$__new_file_path__' - tree_compare.py - --master_tree=$master_tree - --support_dir=$support_dir - --output_dir=$__new_file_path__ - - - - - - - - - - - - - - diff -r 2c9714f56480 -r d80000f5ad20 qiime/upgma_cluster.xml --- a/qiime/upgma_cluster.xml Wed Jun 06 14:50:36 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,49 +0,0 @@ - - Build a UPGMA tree comparing samples - - upgma_cluster.py - - - qiime_wrapper.py - #if $run_type.input_type.__str__ == 'multi': - --galaxy_new_datasets='^\S+\.tre$:tre' - --galaxy_new_files_path='$__new_file_path__' - --galaxy_datasetid=$output_path.id - --galaxy_logfile=$logfile - #end if - upgma_cluster.py - #if $run_type.input_type.__str__ == 'multi': - --input_path=$input_path.extra_files_path - --output_path='$__new_file_path__' - #else: - --input_path=$input_path - --output_path=$output_path - #end if - - - - - - - - - - - - - - - - - - - - - - For more information, see upgma_cluster_ in the Qiime documentation. - -Updated and validated 01/27/12 - - .. _upgma_cluster: http://qiime.org/scripts/upgma_cluster.html - - diff -r 2c9714f56480 -r d80000f5ad20 qiime_wrapper.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qiime_wrapper.py Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,312 @@ +#!/usr/bin/env python +import logging, os, string, sys, tempfile, glob, shutil, types, urllib, optparse, re +import shlex, subprocess + +""" +sys.argv +this --galaxy_datasets= --quime_script + +alpha_rarefaction + output html + wf_arare/alpha_rarefaction_plots/rarefaction_plots.html + wf_arare/alpha_rarefaction_plots/html_plots/ + wf_arare/alpha_div + wf_arare/alpha_div/alpha_rarefaction_101_0.txt + + --galaxy_summary_html=$output_html + --galaxy_summary_template=$output_template + --galaxy_summary_links='label:link,label:link' + --galaxy_outputdir=$output_html.extra_files_path + + +""" + +def stop_err( msg ): + sys.stderr.write( "%s\n" % msg ) + sys.exit() + +def __main__(): + debug = False + tmp_dir = None + inputdir = None + outputdir = None + dataset_patterns = None + datasetid = None + new_dataset_patterns = None + new_files_path = None + summary_html=None + summary_template=None + summary_links=None + ## adds "log file" printing capabilities for primary output in dynamic file output + logfile = None + ## added support for correcting file extensions + newext = None + extchange = None + ## check if there are files to generate + cmd_args = [] + for arg in sys.argv[1:]: + if arg.startswith('--galaxy_'): + (opt,val) = arg.split('=') if arg.find('=') > 0 else (arg,None) + ''' + if opt == '--galaxy_tmpdir': + try: + if not os.path.exists(val): + os.makedirs(val) + tmp_dir = val + except Exception, ex: + stop_err(ex) + ''' + if opt == '--galaxy_outputdir': + try: + if not os.path.exists(val): + os.makedirs(val) + outputdir = val + except Exception, ex: + stop_err(ex) + if opt == '--galaxy_datasets': + dataset_patterns = val.split(',') + if opt == '--galaxy_datasetid': + datasetid = val + if opt == '--galaxy_new_datasets': + new_dataset_patterns = val.split(',') + if opt == '--galaxy_new_files_path': + if not os.path.exists(val): + os.makedirs(val) + new_files_path = val + if opt == '--galaxy_summary_html': + summary_html=val + if opt == '--galaxy_summary_template': + summary_template=val + if opt == '--galaxy_summary_links': + summary_links=val + if opt == '--galaxy_debug': + debug = True + if opt == '--galaxy_logfile': + logfile = val + if opt == '--galaxy_ext_change': + extchange = val + if opt == '--galaxy_new_ext': + newext = val + if opt == '--galaxy_inputdir': + inputdir = val + else: + cmd_args.append(arg) + if debug: print >> sys.stdout, '\n : '.join(cmd_args) + try: + stderr = '' + # allow for changing of file extension for files which require it + if extchange != None and inputdir != None and newext != None: + #newfile = os.path.join(inputdir,"temporary."+newext) + try: + os.link(extchange,inputdir+"/temporary."+newext) + except: + shutil.copy2(extchange,inputdir+"/temporary."+newext) + cmdline = ' '.join(cmd_args) + if debug: print >> sys.stdout, cmdline + ''' + if tmp_dir == None or not os.path.isdir(tmp_dir): + tmp_dir = tempfile.mkdtemp() + if outputdir == None or not os.path.isdir(outputdir): + outputdir = tmp_dir + ''' + tmp_stderr_name = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix='.err' ).name + tmp_stderr = open( tmp_stderr_name, 'wb' ) + tmp_stdout_name = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix='.out' ).name + tmp_stdout = open( tmp_stdout_name, 'wb' ) + proc = subprocess.Popen( args=cmdline, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno(), stdout=tmp_stdout.fileno() ) + returncode = proc.wait() + tmp_stderr.close() + # get stderr, allowing for case where it's very large + tmp_stderr = open( tmp_stderr_name, 'rb' ) + buffsize = 1048576 + try: + while True: + stderr += tmp_stderr.read( buffsize ) + if not stderr or len( stderr ) % buffsize != 0: + break + if debug: print >> sys.stderr, stderr + except OverflowError: + pass + tmp_stderr.close() + if returncode != 0: + if debug: print >> sys.stderr, "returncode = %d" % returncode + raise Exception, stderr + #raise Exception, sys.stderr + # collect results + if dataset_patterns != None: + for root, dirs, files in os.walk(outputdir): + for fname in files: + fpath = os.path.join(root,fname) + if dataset_patterns != None: + for output in dataset_patterns: + (pattern,path) = output.split(':') + if debug: print >> sys.stdout, '%s -> %s' % (pattern,path) + if path == None or path == 'None': + continue + if debug: print >> sys.stdout, 'outdir %s match: %s' % (fname,re.match(pattern,fname)) + if re.match(pattern,fname): + found = True + # flist.remove(fname) + try: + shutil.copy2(fpath, path) + if new_files_path != None: + os.link(fpath, os.path.join(new_files_path,fname)) + except Exception, ex: + stop_err('%s' % ex) + # move result to outdir + # Need to flatten the dir hierachy in order for galaxy to serve the href links + if summary_html != None: + """ + for root, dirs, files in os.walk(outputdir): + if root != outputdir: + for fname in files: + fpath = os.path.join(root,fname) + """ + ## move everything up one level + dlist = os.listdir(outputdir) + for dname in dlist: + dpath = os.path.join(outputdir,dname) + if os.path.isdir(dpath): + flist = os.listdir(dpath) + for fname in flist: + fpath = os.path.join(dpath,fname) + shutil.move(fpath,outputdir) + if summary_template != None: + shutil.copy(summary_template,summary_html) + """ + flist = os.listdir(outputdir) + if debug: print >> sys.stdout, 'outputdir: %s' % outputdir + if debug: print >> sys.stdout, 'files: %s' % ','.join(flist) + if dataset_patterns != None: + for output in dataset_patterns: + (pattern,path) = output.split(':') + if debug: print >> sys.stdout, '%s -> %s' % (pattern,path) + if path == None or path == 'None': + continue + for fname in flist: + if debug: print >> sys.stdout, 'outdir %s match: %s' % (fname,re.match(pattern,fname)) + if re.match(pattern,fname): + found = True + flist.remove(fname) + fpath = os.path.join(outputdir,fname) + try: + shutil.copy2(fpath, path) + except Exception, ex: + stop_err('%s' % ex) + """ + # Handle the dynamically generated galaxy datasets + # http://bitbucket.org/galaxy/galaxy-central/wiki/ToolsMultipleOutput + # --new_datasets = specifies files to be found in the new_file_path + # The list items are separated by commas + # Each item conatins: a regex pattern for matching filenames and a galaxy datatype (separated by :) + # The regex match.groups()[0] is used as the id name of the dataset, and must result in unique name for each output + # The --galaxy_output flag is used for instances where data needs to be copied to the extra_files_path for later + # directory use + if new_dataset_patterns != None and new_files_path != None and datasetid != None: + for output in new_dataset_patterns: + if ':' in output: pattern,ext = output.split(':',1) + flist = os.listdir(new_files_path) + for fname in flist: + m = re.match(pattern,fname) + if m: + fpath = os.path.join(new_files_path,fname) + if len(m.groups()) > 0: + root = m.groups()[0] + else: + # remove the ext from the name if it exists, galaxy will add back later + # remove underscores since galaxy uses that as a field separator for dynamic datasets + root = re.sub('\.?'+ext+'$','',fname).replace('_','').replace('.','') + # filename pattern required by galaxy + fn = "%s_%s_%s_%s_%s" % ( 'primary', datasetid, root, 'visible', ext ) + if debug: print >> sys.stdout, '> %s' % fpath + if debug: print >> sys.stdout, '< %s' % os.path.join(new_files_path,fn) + try: + os.link(fpath, os.path.join(new_files_path,fn)) + # needed for files with variable output and a directory structure + if outputdir != None: + os.link(fpath, os.path.join(outputdir,fname)) + # clean out files from tmp directory, may be unnecessary + #os.remove(fpath) + except: + shutil.copy2(fpath, os.path.join(new_files_path,fn)) + # needed for files with variable output and a directory structure + if outputdir != None: + os.link(fpath, os.path.join(outputdir,fname)) + + print "bob" + logfile + ''' + if logfile != None: + print "bleep" + if outputdir != None: + print "beep" + logwrite = open(logfile, 'w+') + logwrite.write('Tool started. Files created by tool: \n') + flist = os.listdir(outputdir) + for fname in flist: + if 'DS_Store' not in fname: + logwrite.write(fname+'\n') + logwrite.write('Tool Finished.') + logwrite.close() + if new_files_path != None: + print "boop" + logwrite = open(logfile, 'w+') + if len(logfile.readline() > 0): + logwrite.write('Tool started. Files created by tool: \n') + flist = os.listdir(new_files_path) + for fname in flist: + if 'DS_Store' not in fname: + logwrite.write(fname+'\n') + logwrite.write('Tool Finished.') + logwrite.close() + ''' + except Exception, e: + msg = str(e) + stderr + #msg = str(e) + str(sys.stderr) + #stop_err( 'Error running ' + msg) + finally: + # Only remove temporary directories and files from temporary directory + # Enclose in try block, so we don't report error on stale nfs handles + try: + if logfile != None: + if outputdir != None: + logwrite = open(logfile, 'r+') + logwrite.write('Tool started. Files created by tool: \n') + flist = os.listdir(outputdir) + for fname in flist: + if 'DS_Store' not in fname and 'primary' not in fname: + logwrite.write(fname+'\n') + logwrite.write('Tool Finished.') + logwrite.close() + if new_files_path != None: + logwrite = open(logfile, 'r+') + logwrite.write('Tool started. Files created by tool: \n') + flist = os.listdir(new_files_path) + for fname in flist: + if 'DS_Store' not in fname and 'primary' not in fname: + logwrite.write(fname+'\n') + logwrite.write('Tool Finished.') + logwrite.close() + if tmp_dir != None and os.path.exists(tmp_dir) and os.path.isfile(tmp_dir): + #shutil.rmtree(tmp_dir) + pass + if outputdir != None and 'files' not in outputdir: + flist = os.listdir(outputdir) + for fname in flist: + if 'DS_Store' not in fname and 'primary' not in fname: + os.remove(os.path.join(outputdir,fname)) + if inputdir != None and 'files' not in inputdir: + flist = os.listdir(inputdir) + for fname in flist: + if 'DS_Store' not in fname and 'primary' not in fname: + os.remove(os.path.join(inputdir,fname)) + if new_files_path != None and 'files' not in new_files_path: + flist = os.listdir(new_files_path) + for fname in flist: + if 'DS_Store' not in fname and 'primary' not in fname: + os.remove(os.path.join(new_files_path,fname)) + + except: + pass + +if __name__ == "__main__": __main__() + diff -r 2c9714f56480 -r d80000f5ad20 quality_scores_plot.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/quality_scores_plot.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,30 @@ + + Generates histograms of sequence quality scores and number of nucleotides recorded at a particular index + + quality_scores_plot.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + quality_scores_plot.py + --qual_fp=$qual_fp + --output_dir=$__new_file_path__ + --score_min=$score_min + --verbose + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 shared_phylotypes.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shared_phylotypes.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,32 @@ + + Compute shared OTUs between all pairs of samples + + shared_phylotypes.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + shared_phylotypes.py + --otu_table_fp=$otu_table_fp + --output_fp=$output_fp + --reference_sample=$reference_sample + $force_overwrite + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 single_rarefaction.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/single_rarefaction.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,36 @@ + + Perform rarefaction on an otu table + + single_rarefaction.py + + + qiime_wrapper.py + single_rarefaction.py + --input_path=$input_path + --output_path=$output_path + --depth=$depth + $suppress_lineages_included + $keep_empty_otus + + + + + + + + + + + + + For more information, see single_rarefaction_ in the Qiime documentation. + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + + .. _single_rarefaction: http://qiime.org/scripts/single_rarefaction.html + + diff -r 2c9714f56480 -r d80000f5ad20 sort_denoiser_output.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sort_denoiser_output.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,26 @@ + + Sort denoiser output by cluster size. + + sort_denoiser_output.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + sort_denoiser_output.py + --input_fasta_fp=$input_fasta_fp + --output_file=$output_file + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 sort_otu_table.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sort_otu_table.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,35 @@ + + Script for sorting the sample IDs in an OTU table based on a specified value in a mapping file. + + sort_otu_table.py + + + qiime_wrapper.py + sort_otu_table.py + --input_otu_table=$input_otu_table + --mapping_fp=$mapping_fp + --sort_field=$sort_field + --output_fp=$output_fp + + + + + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 split_libraries.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/split_libraries.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,148 @@ + + Split libraries according to barcodes specified in mapping file + + split_libraries.py + + + qiime_wrapper.py + --galaxy_datasets='^seqs\.fna$:'$sequences,'histograms\.txt:'$histograms,'split_library_log\.txt:'$log + --galaxy_outputdir='$log.extra_files_path' + split_libraries.py + --dir-prefix='$log.extra_files_path' + --map=$map + #set fnas = [] + #for i in $inputs: + #set fnas = $fnas + [$i.fasta.__str__] + #end for + --fasta=#echo ','.join($fnas) + #set quals = [] + #for i in $inputs: + #if $i.qual != None and $i.qual.__str__ != 'None': + #set quals = $quals + [$i.qual.__str__] + #end if + #end for + #if len($quals) > 0: + --qual=#echo ','.join($quals) + #end if + #if len($min_seq_length.__str__) > 0 and $min_seq_length > 0: + --min-seq-length=$min_seq_length + #end if + #if len($max_seq_length.__str__) > 0: + --max-seq-length=$max_seq_length + #end if + $trim_seq_length + #if len($min_qual_score.__str__) > 0: + --min-qual-score=$min_qual_score + #end if + $keep_primer + $keep_barcode + #if len($max_ambig.__str__) > 0: + --max-ambig=$max_ambig + #end if + #if len($max_homopolymer.__str__) > 0: + --max-homopolymer=$max_homopolymer + #end if + #if len($max_primer_mismatch.__str__) > 0: + --max-primer-mismatch=$max_primer_mismatch + #end if + --barcode-type=$barcode_type + #if $max_barcode_errors >= 0.: + --max-barcode-errors=$max_barcode_errors + #end if + #if len($start_numbering_at.__str__) > 0: + --start-numbering-at=$start_numbering_at + #end if + $retain_unassigned_reads + $disable_bc_correction + #if len($qual_score_window.__str__) > 0: + --qual_score_window=$qual_score_window + #end if + $disable_primers + --reverse_primers=$reverse_primers + #if $reverse_primer_mismatches != None and $reverse_primer_mismatches.__str__ != "" and $reverse_primers.__str__!='disable': + --reverse_primer_mismatches=$reverse_primer_mismatches + #end if + $record_qual_scores + $discard_bad_windows + #if $median_length_filtering != None and $median_length_filtering.__str__ != "": + --median_length_filtering=$median_length_filtering + #end if + #if $added_demultiplex_field != None and $added_demultiplex_field.__str__ != "": + --added_demultiplex_field=$added_demultiplex_field + #end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + For more information, see split_libraries_ in the Qiime documentation. + +Updated and validated 01/19/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + + .. _split_libraries: http://qiime.org/scripts/split_libraries.html + + + diff -r 2c9714f56480 -r d80000f5ad20 split_libraries_illumina.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/split_libraries_illumina.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,56 @@ + + Script for processing raw Illumina Genome Analyzer II data. + + split_libraries_illumina.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + split_libraries_illumina.py + --mapping_fp=$mapping_fp + --five_prime_read_fp=$five_prime_read_fp + --three_prime_read_fp=$three_prime_read_fp + --output_dir=$__new_file_path__ + $store_unassigned + --quality_threshold=$quality_threshold + --max_bad_run_length=$max_bad_run_length + --min_per_read_length=$min_per_read_length + --sequence_max_n=$sequence_max_n + --start_seq_id=$start_seq_id + $rev_comp_barcode + $barcode_in_header + + + + + + + + + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 start_parallel_jobs.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/start_parallel_jobs.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,28 @@ + + Starts multiple jobs in parallel on multicore or multiprocessor systems. + + start_parallel_jobs.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + start_parallel_jobs.py + $make_jobs + $submit_jobs + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 submit_to_mgrast.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/submit_to_mgrast.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,32 @@ + + This script submits a FASTA file to MG-RAST + + submit_to_mgrast.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + submit_to_mgrast.py + --input_fasta_fp=$input_fasta_fp + --web_key_auth=$web_key_auth + --project_id=$project_id + --output_dir=$__new_file_path__ + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 summarize_otu_by_cat.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/summarize_otu_by_cat.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,38 @@ + + Create a summarized OTU table for a specific metadata category + + summarize_otu_by_cat.py + + + qiime_wrapper.py + summarize_otu_by_cat.py + --mapping_fp=$mapping_fp + --otu_table_fp=$otu_table_fp + --mapping_category=$mapping_category + --output_fp=$output_fp + $normalize_flag + + + + + + + + + + + + + For more information, see summarize_otu_by_cat_ in the Qiime documentation. + +Updated and validated 01/18/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + + .. _summarize_otu_by_cat: http://qiime.org/scripts/summarize_otu_by_cat.html + + diff -r 2c9714f56480 -r d80000f5ad20 summarize_taxa.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/summarize_taxa.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,93 @@ + +Summarize Taxa + + summarize_taxa.py + + + qiime_wrapper.py + #set $levelnums = str($level).split(",") + #set $filestr = "" + #if $len($level.__str__) > 1: + #for $i in $levelnums: + #set $filestr = $filestr + '^\\\\S+_L'+$i+'\\\\.txt$:txt,' + #end for + --galaxy_new_datasets=$filestr + --galaxy_datasetid=$output1.id + --galaxy_new_files_path='$__new_file_path__' + --galaxy_logfile=$output1.__str__ + #else: + --galaxy_datasets='^\S+_L'$level'\.txt$:'$output1 + --galaxy_outputdir='$output1.extra_files_path' + #end if + summarize_taxa.py + --otu_table_fp=$otu_table_fp + --level=$level + #if $mapping != None and $mapping.__str__ != 'None': + --mapping=$mapping + #end if + #if $len($level.__str__) > 1: + --output_dir=$__new_file_path__ + #else: + --output_dir='$output1.extra_files_path' + #end if + $absolute_abundance + #if $lower_percentage > 0.0 and $lower_percentage.__str__ != '': + --lower_percentage=$lower_percentage + #end if + #if $upper_percentage > 0.0 and $upper_percentage.__str__ != '': + --upper_percentage=$upper_percentage + #end if + $transposed_output + #if $delimiter.__str__ != ';': + --delimiter=$delimiter + #end if + + + + + + + + + + + + + + + + + + + + + + + + + +.. class:: warningmark + +Please reload your browser to see all results in your history. + +For more information, see summarize_taxa_ in the Qiime documentation. + +Updated and validated 01/19/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA + +Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN + + .. _summarize_taxa: http://qiime.org/scripts/summarize_taxa.html + + diff -r 2c9714f56480 -r d80000f5ad20 supervised_learning.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/supervised_learning.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,62 @@ + + Run supervised classification using OTUs as predictors and a mapping file category as class labels. + + supervised_learning.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + supervised_learning.py + --input_data=$input_data + --mapping_file=$mapping_file + --category=$category + --output_dir=$__new_file_path__ + --method=$method + $force + --param_file=$param_file + $show_params + --filter_type=$filter_type + --filter_min=$filter_min + --filter_max=$filter_max + --filter_step=$filter_step + --filter_reps=$filter_reps + $keepfiles + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 transform_coordinate_matrices.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/transform_coordinate_matrices.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,38 @@ + + Transform 2 coordinate matrices + + transform_coordinate_matrices.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + transform_coordinate_matrices.py + --input_fps=$input_fps + --output_dir=$__new_file_path__ + --random_trials=$random_trials + --num_dimensions=$num_dimensions + --sample_id_map_fp=$sample_id_map_fp + $store_trial_details + + + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 tree_compare.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tree_compare.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,29 @@ + + Compare jackknifed/bootstrapped trees + + tree_compare.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + tree_compare.py + --master_tree=$master_tree + --support_dir=$support_dir + --output_dir=$__new_file_path__ + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 trflp_file_to_otu_table.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/trflp_file_to_otu_table.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,26 @@ + + Convert TRFLP text file to an OTU table + + trflp_file_to_otu_table.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + trflp_file_to_otu_table.py + --input_path=$input_path + --output_path=$output_path + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 trim_sff_primers.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/trim_sff_primers.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,40 @@ + + Trim sff primers + + trim_sff_primers.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + trim_sff_primers.py + --libdir=$libdir + --input_map=$input_map + --sfffile_path=$sfffile_path + --sffinfo_path=$sffinfo_path + $use_sfftools + $debug + + + + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 truncate_fasta_qual_files.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/truncate_fasta_qual_files.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,32 @@ + + Generates filtered fasta and quality score files by truncating at the specified base position. + + truncate_fasta_qual_files.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + truncate_fasta_qual_files.py + --fasta_fp=$fasta_fp + --qual_fp=$qual_fp + --base_pos=$base_pos + --output_dir=$__new_file_path__ + + + + + + + + + + + + + + + + diff -r 2c9714f56480 -r d80000f5ad20 upgma_cluster.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/upgma_cluster.xml Wed Jun 06 16:17:36 2012 -0400 @@ -0,0 +1,26 @@ + + Build a UPGMA tree comparing samples + + upgma_cluster.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + upgma_cluster.py + --input_path=$input_path + --output_path=$output_path + + + + + + + + + + + + + +