Mercurial > repos > greg > extract_ipm_date_interval
changeset 10:c2eb16ef23c0 draft
Uploaded
author | greg |
---|---|
date | Tue, 07 Aug 2018 13:02:58 -0400 |
parents | fd41f452c9fe |
children | 2a438187d051 |
files | extract_ipm_date_interval.R extract_ipm_date_interval.xml utils.R |
diffstat | 3 files changed, 721 insertions(+), 81 deletions(-) [+] |
line wrap: on
line diff
--- a/extract_ipm_date_interval.R Wed Aug 01 09:18:55 2018 -0400 +++ b/extract_ipm_date_interval.R Tue Aug 07 13:02:58 2018 -0400 @@ -1,28 +1,238 @@ #!/usr/bin/env Rscript suppressPackageStartupMessages(library("data.table")) +suppressPackageStartupMessages(library("hash")) suppressPackageStartupMessages(library("optparse")) option_list <- list( - make_option(c("--input_dir"), action="store", dest="input_dir", help="Directory containing .csv outputs from insect_phenology_model"), - make_option(c("--end_date"), action="store", dest="end_date", help="End date for date interval"), - make_option(c("--start_date"), action="store", dest="start_date", help="Start date for date interval"), - make_option(c("--script_dir"), action="store", dest="script_dir", help="R script source directory") + make_option(c("--input_data_dir"), action="store", dest="input_data_dir", help="Directory containing .csv outputs from insect_phenology_model"), + make_option(c("--end_date"), action="store", dest="end_date", help="End date for date interval"), + make_option(c("--start_date"), action="store", dest="start_date", help="Start date for date interval"), + make_option(c("--script_dir"), action="store", dest="script_dir", help="R script source directory"), + make_option(c("--tool_parameters"), action="store", dest="tool_parameters", help="Users defined parameters for executing the insect_phenology_model inputs") ) parser <- OptionParser(usage="%prog [options] file", option_list=option_list); args <- parse_args(parser, positional_arguments=TRUE); opt <- args$options; -get_new_temperature_data_frame = function(input_file) { - temperature_data_frame = read.csv(file=input_file, header=T, strip.white=TRUE, stringsAsFactors=FALSE, sep=","); +get_new_temperature_data_frame = function(input_data_file) { + # Read a csv file to produce a data frame + # consisting of the data which was produced + # by the insect_phenology_model tool. + temperature_data_frame = read.csv(file=input_data_file, header=T, strip.white=TRUE, stringsAsFactors=FALSE, sep=","); return(temperature_data_frame); } +parse_tool_parameters = function(tool_parameters) { + # Parse the tool parameters that were used to produce + # the input datasets found in input_data_dir. These + # datasets were produced by the insect_phenology_model + # tool. + raw_params = sub("^__SeP__", "", tool_parameters); + raw_param_items = strsplit(raw_params, "__SeP__")[[1]]; + keys = raw_param_items[c(T, F)]; + values = raw_param_items[c(F, T)]; + num_keys_and_vals = length(keys); + for (i in 1:num_keys_and_vals) { + values[i] = restore_text(values[[i]]); + } + for (i in 1:num_keys_and_vals) { + key = keys[i]; + if (endsWith(key, "cond")) { + value = values[i]; + # Galaxy passes some input job parameters as json-like strings + # for complex objects like conditionals, so we should see if + # we can re-implement this using r-jsonlite if possible. An + # exception is currently thrown when we do this: + # params_hash = fromJSON(opt$tool_parameters); + # Error: lexical error: invalid char in json text. + # __SeP__adult_mortality__SeP____ + # (right here) ------^ + # Here is an example complex object parameter value, in + # this case the parameter name is plot_nymph_life_stage_cond. + # {"life_stages_nymph": ["Total"], "__current_case__": 0, "plot_nymph_life_stage": "yes"} + # This code is somewhat brittle, so a better approach is + # warranted if possible. + if (key == "merge_ytd_temperature_data_cond") { + val = grep("yes", value); + if (length(val)>0) { + # Get the location. + items = strsplit(value, "\"location\": ")[[1]]; + location_str = items[2]; + val = grep("\",", location_str); + if (length(val)>0) { + items = strsplit(location_str, "\",")[[1]]; + location = items[1]; + } else { + location = items[1]; + } + if (location == "\"") { + location = ""; + } + keys[i] = "location"; + values[i] = location; + } + } else if (key =="plot_nymph_life_stage_cond") { + val = grep("yes", value); + if (length(val)==0) { + keys[i] = "plot_nymph_life_stage"; + values[i] = "no"; + } else { + # Get the value for "life_stages_nymph". + items = strsplit(value, "\"life_stages_nymph\": ")[[1]]; + life_stages_nymph_str = items[2]; + if (grep("],", life_stages_nymph_str)[[1]] > 0) { + items = strsplit(life_stages_nymph_str, "],")[[1]]; + life_stages_nymph_str = items[1]; + #life_stages_nymph_str = sub("^\\[", "", life_stages_nymph_str); + num_curent_keys = length(keys); + keys[num_curent_keys+1] = "life_stages_nymph"; + values[num_curent_keys+1] = life_stages_nymph_str; + } + keys[i] = "plot_nymph_life_stage"; + values[i] = "yes"; + } + } else if (key =="plot_adult_life_stage_cond") { + val = grep("yes", value); + # The value of val is an integer if the pattern is not found. + if (length(val)==0) { + keys[i] = "plot_adult_life_stage"; + values[i] = "no"; + } else { + # Get the value for "life_stages_adult". + items = strsplit(value, "\"life_stages_adult\": ")[[1]]; + life_stages_adult_str = items[2]; + if (grep("],", life_stages_adult_str)[[1]] > 0) { + items = strsplit(life_stages_adult_str, "],")[[1]]; + life_stages_adult_str = items[1]; + #life_stages_adult_str = sub("^\\[", "", life_stages_adult_str); + num_curent_keys = length(keys); + keys[num_curent_keys+1] = "life_stages_adult"; + values[num_curent_keys+1] = life_stages_adult_str; + } + keys[i] = "plot_adult_life_stage"; + values[i] = "yes"; + } + } + } + } + # Strip all double qu0tes from values. + for (i in 1:length(values)) { + value = values[i]; + value = gsub("\"", "", value); + values[i] = value; + } + return(hash(keys, values)); +} + +prepare_plot = function(life_stage, file_path, maxval, ticks, date_labels, chart_type, plot_std_error, insect, location, + latitude, start_date, end_date, total_days_vector, replications, group, group_std_error, group2, group2_std_error, + group3, group3_std_error, sub_life_stage=NULL) { + # Start PDF device driver. + dev.new(width=20, height=30); + pdf(file=file_path, width=20, height=30, bg="white"); + par(mar=c(5, 6, 4, 4), mfrow=c(3, 1)); + render_chart(ticks, date_labels, chart_type, plot_std_error, insect, location, latitude, start_date, end_date, + total_days_vector, maxval, replications, life_stage, group=group, group_std_error=group_std_error, group2=group2, + group2_std_error=group2_std_error, group3=group3, group3_std_error=group3_std_error, sub_life_stage=sub_life_stage); + # Turn off device driver to flush output. + dev.off(); +} + +restore_text = function(text) { + # Un-escape characters that are escaped by the + # Galaxy tool parameter handlers. + if (is.null(text) || length(text) == 0) { + return(text); + } + chars = list(">", "<", "'", '"', "[", "]", "{", "}", "@", "\n", "\r", "\t", "#"); + mapped_chars = list("__gt__", "__lt__", "__sq__", "__dq__", "__ob__", "__cb__", + "__oc__", "__cc__", "__at__", "__cn__", "__cr__", "__tc__", "__pd__"); + for (i in 1:length(mapped_chars)) { + char = chars[[i]]; + mapped_char = mapped_chars[[i]]; + text = gsub(mapped_char, char, text); + } + return(text); +} + # Import the shared utility functions. utils_path <- paste(opt$script_dir, "utils.R", sep="/"); source(utils_path); +params_hash = parse_tool_parameters(opt$tool_parameters); + +# Determine the data we need to generate for plotting. +if (params_hash$plot_generations_separately == "yes") { + plot_generations_separately = TRUE; +} else { + plot_generations_separately = FALSE; +} +if (params_hash$plot_std_error == "yes") { + plot_std_error = TRUE; +} else { + plot_std_error = FALSE; +} +process_eggs = FALSE; +process_nymphs = FALSE; +process_young_nymphs = FALSE; +process_old_nymphs = FALSE; +process_total_nymphs = FALSE; +process_adults = FALSE; +process_previttelogenic_adults = FALSE; +process_vittelogenic_adults = FALSE; +process_diapausing_adults = FALSE; +process_total_adults = FALSE; +if (params_hash$plot_egg_life_stage == "yes") { + process_eggs = TRUE; +} +if (params_hash$plot_nymph_life_stage == "yes") { + process_nymphs = TRUE; + # Get the selected life stages. + value = params_hash$life_stages_nymph; + val = grep("Young", value); + if (length(val)>0) { + process_young_nymphs = TRUE; + } + val = grep("Old", value); + if (length(val)>0) { + process_old_nymphs = TRUE; + } + val = grep("Total", value); + if (length(val)>0) { + process_total_nymphs = TRUE; + } +} +if (params_hash$plot_adult_life_stage == "yes") { + process_adults = TRUE; + # Get the selected life stages. + value = params_hash$life_stages_adult; + val = grep("Pre-vittelogenic", value); + if (length(val)>0) { + process_previttelogenic_adults = TRUE; + } + val = grep("Vittelogenic", value); + if (length(val)>0) { + process_vittelogenic_adults = TRUE; + } + val = grep("Diapausing", value); + if (length(val)>0) { + process_diapausing_adults = TRUE; + } + val = grep("Total", value); + if (length(val)>0) { + process_total_adults = TRUE; + } +} + +if (params_hash$plot_egg_life_stage == "yes" & params_hash$plot_nymph_life_stage == "yes" & params_hash$plot_adult_life_stage == "yes") { + process_total = TRUE; +} else { + process_total = FALSE; +} + + # FIXME: currently custom date fields are free text, but # Galaxy should soon include support for a date selector # at which point this tool should be enhanced to use it. @@ -51,23 +261,460 @@ # Display the total number of days in the Galaxy history item blurb. cat("Number of days in date interval: ", num_days, "\n"); -input_files = list.files(path=opt$input_dir, full.names=TRUE); -for(input_file in input_files) { - temperature_data_frame = get_new_temperature_data_frame(input_file); +# Create the csv data files consisting of the date interval. +input_data_files = list.files(path=opt$input_data_dir, full.names=TRUE); +for (input_data_file in input_data_files) { + file_name = basename(input_data_file); + temperature_data_frame = get_new_temperature_data_frame(input_data_file); start_date_row = which(temperature_data_frame$DATE==start_date); end_date_row = which(temperature_data_frame$DATE==end_date); # Extract the date interval. temperature_data_frame = temperature_data_frame[start_date_row:end_date_row,]; - # Get the ticks date labels for plots. + # Save the date interval data into an output file + # named the same as the input. + file_path = paste("output_data_dir", file_name, sep="/"); + write.csv(temperature_data_frame, file=file_path, row.names=F); +} + +# Extract the vectors needed for plots from the input data files +# produced by the insect_phenology_model tool. +total_days_vector = NULL; +ticks_and_labels = NULL; +latitude = NULL; +input_data_files = list.files(path="output_data_dir", full.names=TRUE); +for (input_data_file in input_data_files) { + file_name = basename(input_data_file); + temperature_data_frame = get_new_temperature_data_frame(input_data_file); + # Initialize the total_days_vector for later plotting. + if (is.null(total_days_vector)) { + total_days_vector = c(1:dim(temperature_data_frame)[1]); + } + if (is.null(ticks_and_labels)) { + # Get the ticks date labels for later plotting ticks_and_labels = get_x_axis_ticks_and_labels(temperature_data_frame, date_interval=TRUE); ticks = c(unlist(ticks_and_labels[1])); date_labels = c(unlist(ticks_and_labels[2])); - # All latitude values are the same, so get the value - # for plots from the first row. - latitude = temperature_data_frame$LATITUDE[1]; - # Save the date interval data into an output file - # named the same as the input. - file_path = paste("output_data_dir", basename(input_file), sep="/"); - write.csv(temperature_data_frame, file=file_path, row.names=F); - # TODO: Save the date interval plots... + } + if (is.null(latitude)) { + # Get the latitude for later plotting. + latitude = temperature_data_frame$LATITUDE[1]; + } + + if (file_name == "04_combined_generations.csv") { + if (process_eggs) { + eggs = temperature_data_frame$EGG; + if (plot_std_error) { + eggs.std_error = temperature_data_frame$EGGSE; + } + } + if (process_young_nymphs) { + young_nymphs = temperature_data_frame$YOUNGNYMPH; + if (plot_std_error) { + young_nymphs.std_error = temperature_data_frame$YOUNGNYMPHSE; + } + } + if (process_old_nymphs) { + old_nymphs = temperature_data_frame$OLDNYMPH; + if (plot_std_error) { + old_nymphs.std_error = temperature_data_frame$OLDNYMPHSE; + } + } + if (process_total_nymphs) { + total_nymphs = temperature_data_frame$TOTALNYMPH; + if (plot_std_error) { + total_nymphs.std_error = temperature_data_frame$TOTALNYMPHSE; + } + } + if (process_previttelogenic_adults) { + previttelogenic_adults = temperature_data_frame$PRE.VITADULT; + if (plot_std_error) { + previttelogenic_adults.std_error = temperature_data_frame$PRE.VITADULTSE; + } + } + if (process_vittelogenic_adults) { + vittelogenic_adults = temperature_data_frame$VITADULT; + if (plot_std_error) { + vittelogenic_adults.std_error = temperature_data_frame$VITADULTSE; + } + } + if (process_diapausing_adults) { + diapausing_adults = temperature_data_frame$DIAPAUSINGADULT; + if (plot_std_error) { + diapausing_adults.std_error = temperature_data_frame$DIAPAUSINGADULTSE; + } + } + if (process_total_adults) { + total_adults = temperature_data_frame$TOTALADULT; + if (plot_std_error) { + total_adults.std_error = temperature_data_frame$TOTALADULTSE; + } + } + } else if (file_name == "01_generation_P.csv") { + if (process_eggs) { + P_eggs = temperature_data_frame$EGG.P; + if (plot_std_error) { + P_eggs.std_error = temperature_data_frame$EGG.P.SE; + } + } + if (process_young_nymphs) { + P_young_nymphs = temperature_data_frame$YOUNGNYMPH.P; + if (plot_std_error) { + P_young_nymphs.std_error = temperature_data_frame$YOUNGNYMPH.P.SE; + } + } + if (process_old_nymphs) { + P_old_nymphs = temperature_data_frame$OLDNYMPH.P; + if (plot_std_error) { + P_old_nymphs.std_error = temperature_data_frame$OLDNYMPH.P.SE; + } + } + if (process_total_nymphs) { + P_total_nymphs = temperature_data_frame$TOTALNYMPH.P; + if (plot_std_error) { + P_total_nymphs.std_error = temperature_data_frame$TOTALNYMPH.P.SE; + } + } + if (process_previttelogenic_adults) { + P_previttelogenic_adults = temperature_data_frame$PRE.VITADULT.P; + if (plot_std_error) { + P_previttelogenic_adults.std_error = temperature_data_frame$PRE.VITADULT.P.SE; + } + } + if (process_vittelogenic_adults) { + P_vittelogenic_adults = temperature_data_frame$VITADULT.P; + if (plot_std_error) { + P_vittelogenic_adults.std_error = temperature_data_frame$VITADULT.P.SE; + } + } + if (process_diapausing_adults) { + P_diapausing_adults = temperature_data_frame$DIAPAUSINGADULT.P; + if (plot_std_error) { + P_diapausing_adults.std_error = temperature_data_frame$DIAPAUSINGADULT.P.SE; + } + } + if (process_total_adults) { + P_total_adults = temperature_data_frame$TOTALADULT.P; + if (plot_std_error) { + P_total_adults.std_error = temperature_data_frame$TOTALADULT.P.SE; + } + } + } else if (file_name == "02_generation_F1.csv") { + if (process_eggs) { + F1_eggs = temperature_data_frame$EGG.F1; + if (plot_std_error) { + F1_eggs.std_error = temperature_data_frame$EGG.F1.SE; + } + } + if (process_young_nymphs) { + F1_young_nymphs = temperature_data_frame$YOUNGNYMPH.F1; + if (plot_std_error) { + F1_young_nymphs.std_error = temperature_data_frame$YOUNGNYMPH.F1.SE; + } + } + if (process_old_nymphs) { + F1_old_nymphs = temperature_data_frame$OLDNYMPH.F1; + if (plot_std_error) { + F1_old_nymphs.std_error = temperature_data_frame$OLDNYMPH.F1.SE; + } + } + if (process_total_nymphs) { + F1_total_nymphs = temperature_data_frame$TOTALNYMPH.F1; + if (plot_std_error) { + F1_total_nymphs.std_error = temperature_data_frame$TOTALNYMPH.F1.SE; + } + } + if (process_previttelogenic_adults) { + F1_previttelogenic_adults = temperature_data_frame$PRE.VITADULT.F1; + if (plot_std_error) { + F1_previttelogenic_adults.std_error = temperature_data_frame$PRE.VITADULT.F1.SE; + } + } + if (process_vittelogenic_adults) { + F1_vittelogenic_adults = temperature_data_frame$VITADULT.F1; + if (plot_std_error) { + F1_vittelogenic_adults.std_error = temperature_data_frame$VITADULT.F1.SE; + } + } + if (process_diapausing_adults) { + F1_diapausing_adults = temperature_data_frame$DIAPAUSINGADULT.F1; + if (plot_std_error) { + F1_diapausing_adults.std_error = temperature_data_frame$DIAPAUSINGADULT.F1.SE; + } + } + if (process_total_adults) { + F1_total_adults = temperature_data_frame$TOTALADULT.F1; + if (plot_std_error) { + F1_total_adults.std_error = temperature_data_frame$TOTALADULT.F1.SE; + } + } + } else if (file_name == "03_generation_F2.csv") { + if (process_eggs) { + F2_eggs = temperature_data_frame$EGG.F2; + if (plot_std_error) { + F2_eggs.std_error = temperature_data_frame$EGG.F2.SE; + } + } + if (process_young_nymphs) { + F2_young_nymphs = temperature_data_frame$YOUNGNYMPH.F2; + if (plot_std_error) { + F2_young_nymphs.std_error = temperature_data_frame$YOUNGNYMPH.F2.SE; + } + } + if (process_old_nymphs) { + F2_old_nymphs = temperature_data_frame$OLDNYMPH.F2; + if (plot_std_error) { + F2_old_nymphs.std_error = temperature_data_frame$OLDNYMPH.F2.SE; + } + } + if (process_total_nymphs) { + F2_total_nymphs = temperature_data_frame$TOTALNYMPH.F2; + if (plot_std_error) { + F2_total_nymphs.std_error = temperature_data_frame$TOTALNYMPH.F2.SE; + } + } + if (process_previttelogenic_adults) { + F2_previttelogenic_adults = temperature_data_frame$PRE.VITADULT.F2; + if (plot_std_error) { + F2_previttelogenic_adults.std_error = temperature_data_frame$PRE.VITADULT.F2.SE; + } + } + if (process_vittelogenic_adults) { + F2_vittelogenic_adults = temperature_data_frame$VITADULT.F2; + if (plot_std_error) { + F2_vittelogenic_adults.std_error = temperature_data_frame$VITADULT.F2.SE; + } + } + if (process_diapausing_adults) { + F2_diapausing_adults = temperature_data_frame$DIAPAUSINGADULT.F2; + if (plot_std_error) { + F2_diapausing_adults.std_error = temperature_data_frame$DIAPAUSINGADULT.F2.SE; + } + } + if (process_total_adults) { + F2_total_adults = temperature_data_frame$TOTALADULT.F2; + if (plot_std_error) { + F2_total_adults.std_error = temperature_data_frame$TOTALADULT.F2.SE; + } + } + } } + +# Create the pdf plot files based on the date interval. +if (plot_generations_separately) { + chart_type = "pop_size_by_generation"; + if (process_eggs) { + # Total population size by generation. + life_stage = "Egg"; + file_path = get_file_path(life_stage, "egg_pop_by_generation.pdf") + maxval = max(P_eggs+F1_eggs+F2_eggs) + 100; + prepare_plot(life_stage, file_path, maxval, ticks, date_labels, chart_type, params_hash$plot_std_error, + params_hash$insect, params_hash$location, latitude, start_date, end_date, total_days_vector, + params_hash$replications, group=P_eggs, group_std_error=P_eggs.std_error, group2=F1_eggs, + group2_std_error=F1_eggs.std_error, group3=F2_eggs, group3_std_error=F2_eggs.std_error); + } + if (process_nymphs) { + life_stage = "Nymph"; + if (process_young_nymphs) { + # Young nymph population size by generation. + sub_life_stage = "Young"; + file_path = get_file_path(life_stage, "nymph_pop_by_generation.pdf", sub_life_stage=sub_life_stage) + maxval = max(P_young_nymphs+F1_young_nymphs+F2_young_nymphs) + 100; + prepare_plot(life_stage, file_path, maxval, ticks, date_labels, chart_type, params_hash$plot_std_error, + params_hash$insect, params_hash$location, latitude, start_date, end_date, total_days_vector, + params_hash$replications, group=P_young_nymphs, group_std_error=P_young_nymphs.std_error, + group2=F1_young_nymphs, group2_std_error=F1_young_nymphs.std_error, group3=F2_young_nymphs, + group3_std_error=F2_young_nymphs.std_error, sub_life_stage=sub_life_stage); + } + if (process_old_nymphs) { + # Old nymph population size by generation. + sub_life_stage = "Old"; + file_path = get_file_path(life_stage, "nymph_pop_by_generation.pdf", sub_life_stage=sub_life_stage) + maxval = max(P_old_nymphs+F1_old_nymphs+F2_old_nymphs) + 100; + prepare_plot(life_stage, file_path, maxval, ticks, date_labels, chart_type, params_hash$plot_std_error, + params_hash$insect, params_hash$location, latitude, start_date, end_date, total_days_vector, + params_hash$replications, group=P_old_nymphs, group_std_error=P_old_nymphs.std_error, + group2=F1_old_nymphs, group2_std_error=F1_old_nymphs.std_error, group3=F2_old_nymphs, + group3_std_error=F2_old_nymphs.std_error, sub_life_stage=sub_life_stage); + } + if (process_total_nymphs) { + # Total nymph population size by generation. + sub_life_stage = "Total"; + file_path = get_file_path(life_stage, "nymph_pop_by_generation.pdf", sub_life_stage=sub_life_stage) + maxval = max(P_total_nymphs+F1_total_nymphs+F2_total_nymphs) + 100; + prepare_plot(life_stage, file_path, maxval, ticks, date_labels, chart_type, params_hash$plot_std_error, + params_hash$insect, params_hash$location, latitude, start_date, end_date, total_days_vector, + params_hash$replications, group=P_total_nymphs, group_std_error=P_total_nymphs.std_error, + group2=F1_total_nymphs, group2_std_error=F1_total_nymphs.std_error, group3=F2_total_nymphs, + group3_std_error=F2_total_nymphs.std_error, sub_life_stage=sub_life_stage); + } + } + if (process_adults) { + life_stage = "Adult"; + if (process_previttelogenic_adults) { + # Pre-vittelogenic adult population size by generation. + sub_life_stage = "Pre-vittelogenic"; + file_path = get_file_path(life_stage, "adult_pop_by_generation.pdf", sub_life_stage=sub_life_stage) + maxval = max(P_previttelogenic_adults+F1_previttelogenic_adults+F2_previttelogenic_adults) + 100; + prepare_plot(life_stage, file_path, maxval, ticks, date_labels, chart_type, params_hash$plot_std_error, + params_hash$insect, params_hash$location, latitude, start_date, end_date, total_days_vector, + params_hash$replications, group=P_previttelogenic_adults, + group_std_error=P_previttelogenic_adults.std_error, group2=F1_previttelogenic_adults, + group2_std_error=F1_previttelogenic_adults.std_error, group3=F2_previttelogenic_adults, + group3_std_error=F2_previttelogenic_adults.std_error, sub_life_stage=sub_life_stage); + } + if (process_vittelogenic_adults) { + # Vittelogenic adult population size by generation. + sub_life_stage = "Vittelogenic"; + file_path = get_file_path(life_stage, "adult_pop_by_generation.pdf", sub_life_stage=sub_life_stage) + maxval = max(P_vittelogenic_adults+F1_vittelogenic_adults+F2_vittelogenic_adults) + 100; + prepare_plot(life_stage, file_path, maxval, ticks, date_labels, chart_type, params_hash$plot_std_error, + params_hash$insect, params_hash$location, latitude, start_date, end_date, total_days_vector, + params_hash$replications, group=P_vittelogenic_adults, + group_std_error=P_vittelogenic_adults.std_error, group2=F1_vittelogenic_adults, + group2_std_error=F1_vittelogenic_adults.std_error, group3=F2_vittelogenic_adults, + group3_std_error=F2_vittelogenic_adults.std_error, sub_life_stage=sub_life_stage); + } + if (process_diapausing_adults) { + # Diapausing adult population size by generation. + sub_life_stage = "Diapausing"; + file_path = get_file_path(life_stage, "adult_pop_by_generation.pdf", sub_life_stage=sub_life_stage) + maxval = max(P_diapausing_adults+F1_diapausing_adults+F2_diapausing_adults) + 100; + prepare_plot(life_stage, file_path, maxval, ticks, date_labels, chart_type, params_hash$plot_std_error, + params_hash$insect, params_hash$location, latitude, start_date, end_date, total_days_vector, + params_hash$replications, group=P_diapausing_adults, group_std_error=P_diapausing_adults.std_error, + group2=F1_diapausing_adults, group2_std_error=F1_diapausing_adults.std_error, group3=F2_diapausing_adults, + group3_std_error=F2_diapausing_adults.std_error, sub_life_stage=sub_life_stage); + } + if (process_total_adults) { + # Total adult population size by generation. + sub_life_stage = "Total"; + file_path = get_file_path(life_stage, "adult_pop_by_generation.pdf", sub_life_stage=sub_life_stage) + maxval = max(P_total_adults+F1_total_adults+F2_total_adults) + 100; + prepare_plot(life_stage, file_path, maxval, ticks, date_labels, chart_type, params_hash$plot_std_error, + params_hash$insect, params_hash$location, latitude, start_date, end_date, total_days_vector, + params_hash$replications, group=P_total_adults, group_std_error=P_total_adults.std_error, + group2=F1_total_adults, group2_std_error=F1_total_adults.std_error, group3=F2_total_adults, + group3_std_error=F2_total_adults.std_error, sub_life_stage=sub_life_stage); + } + } + if (process_total) { + life_stage = "Total"; + # Total population size for egg, nymph and adult by generation. + file_path = get_file_path(life_stage, "total_pop_by_generation.pdf") + maxval = max(total_adults+eggs+total_nymphs) + 100; + # P == total_adults + # P.std_error == total_adults.std_error + # F1 == eggs + # F1.std_error == eggs.std_error + # F2 == ??? + # F2.std_error == ??? + # FIXME: testing demonstrates that P and F1 are properly assigned + # above, but F2 cannot be determined. F2 should undoubtedly be + # total_nymphs, but the data is not the same bewteen the output + # from the insect_phenology_model tool and the date interval from + # this tool. We won't plot the total until we get time to figure + # this out. + #prepare_plot(life_stage, file_path, maxval, ticks, date_labels, chart_type, params_hash$plot_std_error, + # params_hash$insect, params_hash$location, latitude, start_date, end_date, total_days_vector, + # params_hash$replications, group=total_adults, group_std_error=total_adults.std_error, group2=eggs, + # group2_std_error=eggs.std_error, group3=total_nymphs, group3_std_error=total_nymphs.std_error); + } +} else { + chart_type = "pop_size_by_life_stage"; + if (process_eggs) { + # Egg population size. + life_stage = "Egg"; + file_path = get_file_path(life_stage, "egg_pop.pdf") + maxval = max(eggs+eggs.std_error) + 100; + prepare_plot(life_stage, file_path, maxval, ticks, date_labels, chart_type, params_hash$plot_std_error, + params_hash$insect, params_hash$location, latitude, start_date, end_date, total_days_vector, + params_hash$replications, group=eggs, group_std_error=eggs.std_error); + } + if (process_nymphs) { + life_stage = "Nymph"; + if (process_young_nymphs) { + # Young nymph population size. + sub_life_stage = "Young"; + file_path = get_file_path(life_stage, "nymph_pop.pdf", sub_life_stage=sub_life_stage) + maxval = max(young_nymphs+young_nymphs.std_error) + 100; + prepare_plot(life_stage, file_path, maxval, ticks, date_labels, chart_type, params_hash$plot_std_error, + params_hash$insect, params_hash$location, latitude, start_date, end_date, total_days_vector, + params_hash$replications, group=young_nymphs, group_std_error=young_nymphs.std_error, + sub_life_stage=sub_life_stage); + } + if (process_old_nymphs) { + # Old nymph population size. + sub_life_stage = "Old"; + file_path = get_file_path(life_stage, "nymph_pop.pdf", sub_life_stage=sub_life_stage) + maxval = max(old_nymphs+old_nymphs.std_error) + 100; + prepare_plot(life_stage, file_path, maxval, ticks, date_labels, chart_type, params_hash$plot_std_error, + params_hash$insect, params_hash$location, latitude, start_date, end_date, total_days_vector, + params_hash$replications, group=old_nymphs, group_std_error=old_nymphs.std_error, + sub_life_stage=sub_life_stage); + } + if (process_total_nymphs) { + # Total nymph population size. + sub_life_stage = "Total"; + file_path = get_file_path(life_stage, "nymph_pop.pdf", sub_life_stage=sub_life_stage) + maxval = max(total_nymphs+total_nymphs.std_error) + 100; + prepare_plot(life_stage, file_path, maxval, ticks, date_labels, chart_type, params_hash$plot_std_error, + params_hash$insect, params_hash$location, latitude, start_date, end_date, total_days_vector, + params_hash$replications, group=total_nymphs, group_std_error=total_nymphs.std_error, + sub_life_stage=sub_life_stage); + } + } + if (process_adults) { + life_stage = "Adult"; + if (process_previttelogenic_adults) { + # Pre-vittelogenic adult population size. + sub_life_stage = "Pre-vittelogenic"; + file_path = get_file_path(life_stage, "adult_pop.pdf", sub_life_stage=sub_life_stage) + maxval = max(previttelogenic_adults+previttelogenic_adults.std_error) + 100; + prepare_plot(life_stage, file_path, maxval, ticks, date_labels, chart_type, params_hash$plot_std_error, + params_hash$insect, params_hash$location, latitude, start_date, end_date, total_days_vector, + params_hash$replications, group=previttelogenic_adults, + group_std_error=previttelogenic_adults.std_error, sub_life_stage=sub_life_stage); + } + if (process_vittelogenic_adults) { + # Vittelogenic adult population size. + sub_life_stage = "Vittelogenic"; + file_path = get_file_path(life_stage, "adult_pop.pdf", sub_life_stage=sub_life_stage) + maxval = max(vittelogenic_adults+vittelogenic_adults.std_error) + 100; + prepare_plot(life_stage, file_path, maxval, ticks, date_labels, chart_type, params_hash$plot_std_error, + params_hash$insect, params_hash$location, latitude, start_date, end_date, total_days_vector, + params_hash$replications, group=vittelogenic_adults, + group_std_error=vittelogenic_adults.std_error, sub_life_stage=sub_life_stage); + } + if (process_diapausing_adults) { + # Diapausing adult population size. + sub_life_stage = "Diapausing"; + file_path = get_file_path(life_stage, "adult_pop.pdf", sub_life_stage=sub_life_stage) + maxval = max(diapausing_adults+diapausing_adults.std_error) + 100; + prepare_plot(life_stage, file_path, maxval, ticks, date_labels, chart_type, params_hash$plot_std_error, + params_hash$insect, params_hash$location, latitude, start_date, end_date, total_days_vector, + params_hash$replications, group=diapausing_adults, group_std_error=diapausing_adults.std_error, + sub_life_stage=sub_life_stage); + } + if (process_total_adults) { + # Total adult population size. + sub_life_stage = "Total"; + file_path = get_file_path(life_stage, "adult_pop.pdf", sub_life_stage=sub_life_stage) + maxval = max(total_adults+total_adults.std_error) + 100; + prepare_plot(life_stage, file_path, maxval, ticks, date_labels, chart_type, params_hash$plot_std_error, + params_hash$insect, params_hash$location, latitude, start_date, end_date, total_days_vector, + params_hash$replications, group=total_adults, group_std_error=total_adults.std_error, + sub_life_stage=sub_life_stage); + } + } + if (process_total) { + # Total population size. + life_stage = "Total"; + file_path = get_file_path(life_stage, "total_pop.pdf") + maxval = max(eggs+eggs.std_error, total_nymphs+total_nymphs.std_error, total_adults+total_adults.std_error) + 100; + prepare_plot(life_stage, file_path, maxval, ticks, date_labels, chart_type, params_hash$plot_std_error, + params_hash$insect, params_hash$location, latitude, start_date, end_date, total_days_vector, + params_hash$replications, group=total_adults, group_std_error=total_adults.std_error, + group2=total_nymphs, group2_std_error=total_nymphs.std_error, group3=eggs, group3_std_error=eggs.std_error); + } +} +
--- a/extract_ipm_date_interval.xml Wed Aug 01 09:18:55 2018 -0400 +++ b/extract_ipm_date_interval.xml Tue Aug 07 13:02:58 2018 -0400 @@ -1,89 +1,85 @@ <tool id="ipm_date_interval" name="Extract date interval" version="1.1.0"> <description>from insect phenology model data</description> <requirements> + <requirement type="package" version="1.10.4">r-data.table</requirement> + <requirement type="package" version="2.2.6">r-hash</requirement> <requirement type="package" version="1.4.4">r-optparse</requirement> - <requirement type="package" version="1.10.4">r-data.table</requirement> </requirements> <command detect_errors="exit_code"><![CDATA[ #import os -#set input_dir = 'input_dir' +#set input_data_dir = 'input_data_dir' #set output_data_dir = "output_data_dir" #set output_plots_dir = "output_plots_dir" #set error_file = $os.path.join($output_data_dir, "04_combined_generations.csv") -mkdir $input_dir && +#set job = $input_data[0].creating_job +#set tool_parameters = "" +#for $p in $job.parameters: + #set tool_parameters = $tool_parameters + "__SeP__" + $p.name + #set tool_parameters = $tool_parameters + "__SeP__" + $p.value +#end for +mkdir $input_data_dir && mkdir output_data_dir && mkdir output_plots_dir && -#for $i in $input: +#for $i in $input_data: #set filename = $i.file_name #set name = $i.name - ln -s $filename $input_dir/$name && + ln -s $filename $input_data_dir/$name && #end for Rscript '$__tool_directory__/extract_ipm_date_interval.R' --end_date '$end_date' ---input_dir '$input_dir' ---plot_std_error $plot_std_error +--input_data_dir '$input_data_dir' --script_dir '$__tool_directory__' --start_date '$start_date' -&>ipm_log.txt; +--tool_parameters '$tool_parameters' +&>eipmdi_log.txt; if [[ $? -ne 0 ]]; then - cp ipm_log.txt '$error_file'; + cp eipmdi_log.txt '$error_file'; exit 1; fi]]></command> <inputs> - <param name="input" type="data_collection" format="csv" collection_type="list" label="Insect phenology model data files" /> + <param name="input_data" type="data_collection" format="csv" collection_type="list" label="Insect phenology model data files" /> <param name="start_date" type="text" value="" label="Start date" help="Format must be yyyy-mm-dd"> <validator type="expression" message="Date must have the format yyyy-mm-dd">len(value.split('-')[0])==4 and int(value.split('-')[0]) and len(value.split('-')[1])==2 and int(value.split('-')[1]) and len(value.split('-')[2])==2 and int(value.split('-')[2])</validator> </param> <param name="end_date" type="text" value="" label="End date" help="Format must be yyyy-mm-dd"> <validator type="expression" message="Date must have the format yyyy-mm-dd">len(value.split('-')[0])==4 and int(value.split('-')[0]) and len(value.split('-')[1])==2 and int(value.split('-')[1]) and len(value.split('-')[2])==2 and int(value.split('-')[2])</validator> </param> - <param name="plot_std_error" type="select" label="Plot standard error?"> - <option value="yes" selected="True">Yes</option> - <option value="no">No</option> - </param> </inputs> <outputs> <collection name="output_data_collection" type="list" label="${tool.name} (data), on ${on_string}"> <discover_datasets pattern="__name__" directory="output_data_dir" format="csv"/> </collection> - <collection name="output_plots_collection" type="list" label="${tool.name} (plots), on ${on_string}"> + <collection name="output_plot_collection" type="list" label="${tool.name} (plots), on ${on_string}"> <discover_datasets pattern="__name__" directory="output_plots_dir" format="pdf"/> </collection> </outputs> <tests> <test> - <param name="input"> + <!-- + FIXME: Here is a sort of layout for a test, but since the input collection will be uploaded, the test + will fail bacused the job parameters for the inputs will not be those produced by the insect_phenology_model + tool. Testing this tool will require using the workflow testing approach. + <param name="input_data"> <collection type="list"> - <element name="04_combined_generations.csv"/> + <element name="input_p_1.csv" value="input_p_1.csv" ftype="csv"/> + <element name="input_f1_1.csv" value="input_f1_1.csv" ftype="csv"/> + <element name="input_f2_1.csv" value="input_f2_1.csv" ftype="csv"/> + <element name="input_combined1.csv" value="input_combined1.csv" ftype="csv"/> </collection> </param> <param name="start_date" value="2017-04-01"/> <param name="end_date" value="2017-04-15"/> <output_collection name="output_data_collection" type="list"> - <element name="04_combined_generations.csv" file="output_combined6.csv" ftype="csv" compare="contains"/> - </output_collection> - <!-- - <output_collection name="output_plots_collection" type="list"> - <element name="02_young_nymph_pop.pdf" file="plot.pdf" ftype="pdf" compare="contains"/> - <element name="05_pre-vittelogenic_adult_pop.pdf" file="plot.pdf" ftype="pdf" compare="contains"/> + <element name="01_generation_P.csv" file="output_p_1.csv" ftype="csv" compare="contains"/> + <element name="02_generation_F1.csv" file="output_f1_1.csv" ftype="csv" compare="contains"/> + <element name="03_generation_F2.csv" file="output_f2_1.csv" ftype="csv" compare="contains"/> + <element name="04_combined_generations.csv" file="output_combined1.csv" ftype="csv" compare="contains"/> </output_collection> - --> - </test> - <test> - <param name="input"> - <collection type="list"> - <element name="04_combined_generations.csv"/> - </collection> - </param> - <param name="start_date" value="2017-01-01"/> - <param name="end_date" value="2017-01-15"/> - <output_collection name="output_data_collection" type="list"> - <element name="04_combined_generations.csv" file="output_combined7.csv" ftype="csv" compare="contains"/> - </output_collection> - <!-- - <output_collection name="output_plots_collection" type="list"> - <element name="02_young_nymph_pop.pdf" file="plot.pdf" ftype="pdf" compare="contains"/> - <element name="05_pre-vittelogenic_adult_pop.pdf" file="plot.pdf" ftype="pdf" compare="contains"/> + <output_collection name="output_plot_collection" type="list"> + <element name="01_egg_pop_by_generation.pdf" file="plot.pdf" ftype="pdf" compare="contains"/> + <element name="04_total_nymph_pop_by_generation.pdf" file="plot.pdf" ftype="pdf" compare="contains"/> + <element name="08_total_adult_pop_by_generation.pdf" file="plot.pdf" ftype="pdf" compare="contains"/> + <element name="09_total_pop_by_generation.pdf" file="plot.pdf" ftype="pdf" compare="contains"/> </output_collection> --> </test>
--- a/utils.R Wed Aug 01 09:18:55 2018 -0400 +++ b/utils.R Tue Aug 07 13:02:58 2018 -0400 @@ -1,15 +1,12 @@ #!/usr/bin/env Rscript -get_file_path = function(life_stage, base_name, life_stage_nymph=NULL, life_stage_adult=NULL) { - if (!is.null(life_stage_nymph)) { - lsi = get_life_stage_index(life_stage, life_stage_nymph=life_stage_nymph); - file_name = paste(lsi, tolower(life_stage_nymph), base_name, sep="_"); - } else if (!is.null(life_stage_adult)) { - lsi = get_life_stage_index(life_stage, life_stage_adult=life_stage_adult); - file_name = paste(lsi, tolower(life_stage_adult), base_name, sep="_"); - } else { +get_file_path = function(life_stage, base_name, sub_life_stage=NULL) { + if (is.null(sub_life_stage)) { lsi = get_life_stage_index(life_stage); file_name = paste(lsi, base_name, sep="_"); + } else { + lsi = get_life_stage_index(life_stage, sub_life_stage=sub_life_stage); + file_name = paste(lsi, tolower(sub_life_stage), base_name, sep="_"); } file_path = paste("output_plots_dir", file_name, sep="/"); return(file_path); @@ -20,27 +17,27 @@ return (date_str_items[1]); } -get_life_stage_index = function(life_stage, life_stage_nymph=NULL, life_stage_adult=NULL) { +get_life_stage_index = function(life_stage, sub_life_stage=NULL) { # Name collection elements so that they # are displayed in logical order. if (life_stage=="Egg") { lsi = "01"; } else if (life_stage=="Nymph") { - if (life_stage_nymph=="Young") { + if (sub_life_stage=="Young") { lsi = "02"; - } else if (life_stage_nymph=="Old") { + } else if (sub_life_stage=="Old") { lsi = "03"; - } else if (life_stage_nymph=="Total") { + } else if (sub_life_stage=="Total") { lsi="04"; } } else if (life_stage=="Adult") { - if (life_stage_adult=="Pre-vittelogenic") { + if (sub_life_stage=="Pre-vittelogenic") { lsi = "05"; - } else if (life_stage_adult=="Vittelogenic") { + } else if (sub_life_stage=="Vittelogenic") { lsi = "06"; - } else if (life_stage_adult=="Diapausing") { + } else if (sub_life_stage=="Diapausing") { lsi = "07"; - } else if (life_stage_adult=="Total") { + } else if (sub_life_stage=="Total") { lsi = "08"; } } else if (life_stage=="Total") { @@ -223,7 +220,7 @@ render_chart = function(ticks, date_labels, chart_type, plot_std_error, insect, location, latitude, start_date, end_date, days, maxval, replications, life_stage, group, group_std_error, group2=NULL, group2_std_error=NULL, group3=NULL, group3_std_error=NULL, - life_stages_adult=NULL, life_stages_nymph=NULL) { + sub_life_stage=NULL) { if (chart_type=="pop_size_by_life_stage") { if (life_stage=="Total") { title = paste(insect, ": Reps", replications, ":", life_stage, "Pop :", location, ": Lat", latitude, ":", start_date, "-", end_date, sep=" "); @@ -252,14 +249,14 @@ legend_text = c(life_stage); columns = c(4); } else if (life_stage=="Nymph") { - stage = paste(life_stages_nymph, "Nymph Pop :", sep=" "); + stage = paste(sub_life_stage, "Nymph Pop :", sep=" "); title = paste(insect, ": Reps", replications, ":", stage, location, ": Lat", latitude, ":", start_date, "-", end_date, sep=" "); - legend_text = c(paste(life_stages_nymph, life_stage, sep=" ")); + legend_text = c(paste(sub_life_stage, life_stage, sep=" ")); columns = c(2); } else if (life_stage=="Adult") { - stage = paste(life_stages_adult, "Adult Pop", sep=" "); + stage = paste(sub_life_stage, "Adult Pop", sep=" "); title = paste(insect, ": Reps", replications, ":", stage, location, ": Lat", latitude, ":", start_date, "-", end_date, sep=" "); - legend_text = c(paste(life_stages_adult, life_stage, sep=" ")); + legend_text = c(paste(sub_life_stage, life_stage, sep=" ")); columns = c(1); } plot(days, group, main=title, type="l", ylim=c(0, maxval), axes=FALSE, lwd=2, xlab="", ylab="", cex=3, cex.lab=3, cex.axis=3, cex.main=3); @@ -278,9 +275,9 @@ } else if (life_stage=="Egg") { title_str = ": Egg Pop by Gen :"; } else if (life_stage=="Nymph") { - title_str = paste(":", life_stages_nymph, "Nymph Pop by Gen", ":", sep=" "); + title_str = paste(":", sub_life_stage, "Nymph Pop by Gen", ":", sep=" "); } else if (life_stage=="Adult") { - title_str = paste(":", life_stages_adult, "Adult Pop by Gen", ":", sep=" "); + title_str = paste(":", sub_life_stage, "Adult Pop by Gen", ":", sep=" "); } title = paste(insect, ": Reps", replications, title_str, location, ": Lat", latitude, ":", start_date, "-", end_date, sep=" "); legend_text = c("P", "F1", "F2");