# HG changeset patch # User greg # Date 1533062216 14400 # Node ID 4dccc60b35252bce9a45a2f48b8279953c2ab4db Uploaded diff -r 000000000000 -r 4dccc60b3525 .shed.yml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.shed.yml Tue Jul 31 14:36:56 2018 -0400 @@ -0,0 +1,13 @@ +name: extract_ipm_date_interval +owner: greg +description: | + Contains a tool that extracts a date interval from the dataset collection produced by the insect_phenology_model tool. +homepage_url: https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/entomology/extract_ipm_date_interval +long_description: | + Contains a tool that extracts a date interval from the (data) dataset collection produced by the insect_phenology_model + tool and zooms into the interval by producing plots with daily ticks on the x axis instead of weekly) tick marks. The + (data) dataset collection consisting of the restricted date interval is also produced. +remote_repository_url: https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/entomology/extract_ipm_date_interval +type: unrestricted +categories: +- Entomology diff -r 000000000000 -r 4dccc60b3525 extract_ipm_date_interval.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extract_ipm_date_interval.R Tue Jul 31 14:36:56 2018 -0400 @@ -0,0 +1,77 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("data.table")) +suppressPackageStartupMessages(library("optparse")) + +option_list <- list( + make_option(c("--input_dir"), action="store", dest="input_dir", help="Directory containing .csv outputs from insect_phenology_model"), + make_option(c("--start_date"), action="store", dest="start_date", default=NULL, help="Start date for custom date interval"), + make_option(c("--script_dir"), action="store", dest="script_dir", help="R script source directory") +) + +parser <- OptionParser(usage="%prog [options] file", option_list=option_list); +args <- parse_args(parser, positional_arguments=TRUE); +opt <- args$options; + +get_new_temperature_data_frame = function(input_file) { + temperature_data_frame = read.csv(file=input_file, header=T, strip.white=TRUE, stringsAsFactors=FALSE, sep=","); + colnames(temperature_data_frame) = c("LATITUDE", "LONGITUDE", "DATE", "DOY", "TMIN", "TMAX"); + return(temperature_data_frame); +} + +# Import the shared utility functions. +utils_path <- paste(opt$script_dir, "utils.R", sep="/"); +source(utils_path); + +# FIXME: currently custom date fields are free text, but +# Galaxy should soon include support for a date selector +# at which point this tool should be enhanced to use it. +# Validate start_date. + +# Calaculate the number of days in the date interval. +start_date = validate_date(opt$start_date); +# Validate end_date. +end_date = validate_date(opt$end_date); +if (start_date >= end_date) { + stop_err("The start date must be between 1 and 50 days before the end date when setting date intervals for plots."); +} +# Calculate the number of days in the date interval. +num_days = difftime(end_date, start_date, units=c("days")); +# Add 1 to the number of days to make the dates inclusive. For +# example, if the user enters a date range of 2018-01-01 to +# 2018-01-31, they likely expect the end date to be included. +num_days = num_days + 1; +if (num_days > 50) { + # We need to restrict date intervals since + # plots render tick marks for each day. + stop_err("Date intervals for plotting cannot exceed 50 days."); +} +# Display the total number of days in the Galaxy history item blurb. +cat("Number of days in date interval: ", num_days, "\n"); + +# Get the year from the start_date. +year = get_year_from_date(start_date); +# Get the DOY for start_date and end_date. +start_date_doy = as.integer(strftime(start_date, format="%j")); +end_date_doy = as.integer(strftime(end_date, format="%j")); +# Get the ticks date labels for plots. +ticks_and_labels = get_x_axis_ticks_and_labels(temperature_data_frame, prepend_end_doy_norm, append_start_doy_norm, date_interval=TRUE); +ticks = c(unlist(ticks_and_labels[1])); +date_labels = c(unlist(ticks_and_labels[2])); +# All latitude values are the same, so get the value +# for plots from the first row. +latitude = temperature_data_frame$LATITUDE[1]; + +input_files = list.files(path=input_dir, full.names=TRUE); +for(input_file in input_files) { + temperature_data_frame = get_new_temperature_data_frame(input_file); + start_date_row = which(temperature_data_frame$DATE==start_date); + end_date_row = which(temperature_data_frame$DATE==end_date); + # Extract the date interval. + temperature_data_frame = temperature_data_frame[start_date_row:end_date_row,]; + # Save the date interval data into an output file + # named the same as the input. + file_path = paste("output_data_dir", input_file, sep="/"); + write.csv(temperature_data_frame, file=file_path, row.names=F); + # TODO: Save the dat interval plots... +} diff -r 000000000000 -r 4dccc60b3525 extract_ipm_date_interval.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extract_ipm_date_interval.xml Tue Jul 31 14:36:56 2018 -0400 @@ -0,0 +1,106 @@ + + from insect phenology model data + + r-optparse + + ipm_log.txt; +if [[ $? -ne 0 ]]; then + cp ipm_log.txt '$error_file'; + exit 1; +fi]]> + + + + len(value.split('-')[0])==4 and int(value.split('-')[0]) and len(value.split('-')[1])==2 and int(value.split('-')[1]) and len(value.split('-')[2])==2 and int(value.split('-')[2]) + + + len(value.split('-')[0])==4 and int(value.split('-')[0]) and len(value.split('-')[1])==2 and int(value.split('-')[1]) and len(value.split('-')[2])==2 and int(value.split('-')[2]) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +Provides an agent-based stochastic model expressing stage-specific phenology and population dynamics for an insect species across geographic regions. + +----- + +**Required options** + + * **Plot standard error** - add standard error lines to plot. + + + 10.3389/fphys.2016.00165 + 10.1175/JTECH-D-11-00103.1 + 10.7289/V5D21VHZ + + diff -r 000000000000 -r 4dccc60b3525 utils.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/utils.R Tue Jul 31 14:36:56 2018 -0400 @@ -0,0 +1,315 @@ +#!/usr/bin/env Rscript + +get_file_path = function(life_stage, base_name, life_stage_nymph=NULL, life_stage_adult=NULL) { + if (!is.null(life_stage_nymph)) { + lsi = get_life_stage_index(life_stage, life_stage_nymph=life_stage_nymph); + file_name = paste(lsi, tolower(life_stage_nymph), base_name, sep="_"); + } else if (!is.null(life_stage_adult)) { + lsi = get_life_stage_index(life_stage, life_stage_adult=life_stage_adult); + file_name = paste(lsi, tolower(life_stage_adult), base_name, sep="_"); + } else { + lsi = get_life_stage_index(life_stage); + file_name = paste(lsi, base_name, sep="_"); + } + file_path = paste("output_plots_dir", file_name, sep="/"); + return(file_path); +} + +get_life_stage_index = function(life_stage, life_stage_nymph=NULL, life_stage_adult=NULL) { + # Name collection elements so that they + # are displayed in logical order. + if (life_stage=="Egg") { + lsi = "01"; + } else if (life_stage=="Nymph") { + if (life_stage_nymph=="Young") { + lsi = "02"; + } else if (life_stage_nymph=="Old") { + lsi = "03"; + } else if (life_stage_nymph=="Total") { + lsi="04"; + } + } else if (life_stage=="Adult") { + if (life_stage_adult=="Pre-vittelogenic") { + lsi = "05"; + } else if (life_stage_adult=="Vittelogenic") { + lsi = "06"; + } else if (life_stage_adult=="Diapausing") { + lsi = "07"; + } else if (life_stage_adult=="Total") { + lsi = "08"; + } + } else if (life_stage=="Total") { + lsi = "09"; + } + return(lsi); +} + +get_mean_and_std_error = function(p_replications, f1_replications, f2_replications) { + # P mean. + p_m = apply(p_replications, 1, mean); + # P standard error. + p_se = apply(p_replications, 1, sd) / sqrt(opt$replications); + # F1 mean. + f1_m = apply(f1_replications, 1, mean); + # F1 standard error. + f1_se = apply(f1_replications, 1, sd) / sqrt(opt$replications); + # F2 mean. + f2_m = apply(f2_replications, 1, mean); + # F2 standard error. + f2_se = apply(f2_replications, 1, sd) / sqrt(opt$replications); + return(list(p_m, p_se, f1_m, f1_se, f2_m, f2_se)) +} + +get_tick_index = function(index, last_tick, ticks, tick_labels, tick_sep) { + # The R code tries hard not to draw overlapping tick labels, and so + # will omit labels where they would abut or overlap previously drawn + # labels. This can result in, for example, every other tick being + # labelled. We'll keep track of the last tick to make sure all of + # the month labels are displayed, and missing ticks are restricted + # to Sundays which have no labels anyway. + if (last_tick==0) { + return(length(ticks)+1); + } + last_saved_tick = ticks[[length(ticks)]]; + if (index-last_saved_tick 0) { + prepend_end_norm_row = which(temperature_data_frame$DOY==prepend_end_doy_norm); + } else { + prepend_end_norm_row = 0; + } + if (append_start_doy_norm > 0) { + append_start_norm_row = which(temperature_data_frame$DOY==append_start_doy_norm); + } else { + append_start_norm_row = 0; + } + num_rows = dim(temperature_data_frame)[1]; + tick_labels = list(); + ticks = list(); + current_month_label = NULL; + last_tick = 0; + if (date_interval) { + tick_sep = 0; + } else { + tick_sep = 3; + } + for (i in 1:num_rows) { + # Get the year and month from the date which + # has the format YYYY-MM-DD. + date = format(temperature_data_frame$DATE[i]); + # Get the month label. + items = strsplit(date, "-")[[1]]; + month = items[2]; + month_label = month.abb[as.integer(month)]; + day = as.integer(items[3]); + doy = as.integer(temperature_data_frame$DOY[i]); + # We're plotting the entire year, so ticks will + # occur on Sundays and the first of each month. + if (i == prepend_end_norm_row) { + # Add a tick for the end of the 30 year normnals data + # that was prepended to the year-to-date data. + label_str = "End prepended 30 year normals"; + tick_index = get_tick_index(i, last_tick, ticks, tick_labels, tick_sep) + ticks[tick_index] = i; + if (date_interval) { + # Append the day to label_str + tick_labels[tick_index] = paste(label_str, day, sep=" "); + } else { + tick_labels[tick_index] = label_str; + } + last_tick = i; + } else if (doy == append_start_doy_norm) { + # Add a tick for the start of the 30 year normnals data + # that was appended to the year-to-date data. + label_str = "Start appended 30 year normals"; + tick_index = get_tick_index(i, last_tick, ticks, tick_labels, tick_sep) + ticks[tick_index] = i; + if (!identical(current_month_label, month_label)) { + # Append the month to label_str. + label_str = paste(label_str, month_label, spe=" "); + current_month_label = month_label; + } + if (date_interval) { + # Append the day to label_str + label_str = paste(label_str, day, sep=" "); + } + tick_labels[tick_index] = label_str; + last_tick = i; + } else if (i==num_rows) { + # Add a tick for the last day of the year. + label_str = ""; + tick_index = get_tick_index(i, last_tick, ticks, tick_labels, tick_sep) + ticks[tick_index] = i; + if (!identical(current_month_label, month_label)) { + # Append the month to label_str. + label_str = month_label; + current_month_label = month_label; + } + if (date_interval) { + # Append the day to label_str + label_str = paste(label_str, day, sep=" "); + } + tick_labels[tick_index] = label_str; + } else { + if (!identical(current_month_label, month_label)) { + # Add a tick for the month. + tick_index = get_tick_index(i, last_tick, ticks, tick_labels, tick_sep) + ticks[tick_index] = i; + if (date_interval) { + # Append the day to the month. + tick_labels[tick_index] = paste(month_label, day, sep=" "); + } else { + tick_labels[tick_index] = month_label; + } + current_month_label = month_label; + last_tick = i; + } + tick_index = get_tick_index(i, last_tick, ticks, tick_labels, tick_sep) + if (!is.null(tick_index)) { + if (date_interval) { + # Add a tick for every day. The first tick is the + # month label, so add a tick only if i is not 1 + if (i>1 & day>1) { + tick_index = get_tick_index(i, last_tick, ticks, tick_labels, tick_sep) + ticks[tick_index] = i; + # Add the day as the label. + tick_labels[tick_index] = day; + last_tick = i; + } + } else { + # Get the day. + day = weekdays(as.Date(date)); + if (day=="Sunday") { + # Add a tick if we're on a Sunday. + ticks[tick_index] = i; + # Add a blank month label so it is not displayed. + tick_labels[tick_index] = ""; + last_tick = i; + } + } + } + } + } + return(list(ticks, tick_labels)); +} + +render_chart = function(ticks, date_labels, chart_type, plot_std_error, insect, location, latitude, start_date, end_date, days, maxval, + replications, life_stage, group, group_std_error, group2=NULL, group2_std_error=NULL, group3=NULL, group3_std_error=NULL, + life_stages_adult=NULL, life_stages_nymph=NULL) { + if (chart_type=="pop_size_by_life_stage") { + if (life_stage=="Total") { + title = paste(insect, ": Reps", replications, ":", life_stage, "Pop :", location, ": Lat", latitude, ":", start_date, "-", end_date, sep=" "); + legend_text = c("Egg", "Nymph", "Adult"); + columns = c(4, 2, 1); + plot(days, group, main=title, type="l", ylim=c(0, maxval), axes=FALSE, lwd=2, xlab="", ylab="", cex=3, cex.lab=3, cex.axis=3, cex.main=3); + legend("topleft", legend_text, lty=c(1, 1, 1), col=columns, cex=3); + lines(days, group2, lwd=2, lty=1, col=2); + lines(days, group3, lwd=2, lty=1, col=4); + axis(side=1, at=ticks, labels=date_labels, las=2, font.axis=3, xpd=TRUE, cex=3, cex.lab=3, cex.axis=3, cex.main=3); + axis(side=2, font.axis=3, xpd=TRUE, cex=3, cex.lab=3, cex.axis=3, cex.main=3); + if (plot_std_error=="yes") { + # Standard error for group. + lines(days, group+group_std_error, lty=2); + lines(days, group-group_std_error, lty=2); + # Standard error for group2. + lines(days, group2+group2_std_error, col=2, lty=2); + lines(days, group2-group2_std_error, col=2, lty=2); + # Standard error for group3. + lines(days, group3+group3_std_error, col=4, lty=2); + lines(days, group3-group3_std_error, col=4, lty=2); + } + } else { + if (life_stage=="Egg") { + title = paste(insect, ": Reps", replications, ":", life_stage, "Pop :", location, ": Lat", latitude, ":", start_date, "-", end_date, sep=" "); + legend_text = c(life_stage); + columns = c(4); + } else if (life_stage=="Nymph") { + stage = paste(life_stages_nymph, "Nymph Pop :", sep=" "); + title = paste(insect, ": Reps", replications, ":", stage, location, ": Lat", latitude, ":", start_date, "-", end_date, sep=" "); + legend_text = c(paste(life_stages_nymph, life_stage, sep=" ")); + columns = c(2); + } else if (life_stage=="Adult") { + stage = paste(life_stages_adult, "Adult Pop", sep=" "); + title = paste(insect, ": Reps", replications, ":", stage, location, ": Lat", latitude, ":", start_date, "-", end_date, sep=" "); + legend_text = c(paste(life_stages_adult, life_stage, sep=" ")); + columns = c(1); + } + plot(days, group, main=title, type="l", ylim=c(0, maxval), axes=FALSE, lwd=2, xlab="", ylab="", cex=3, cex.lab=3, cex.axis=3, cex.main=3); + legend("topleft", legend_text, lty=c(1), col="black", cex=3); + axis(side=1, at=ticks, labels=date_labels, las=2, font.axis=3, xpd=TRUE, cex=3, cex.lab=3, cex.axis=3, cex.main=3); + axis(side=2, font.axis=3, xpd=TRUE, cex=3, cex.lab=3, cex.axis=3, cex.main=3); + if (plot_std_error=="yes") { + # Standard error for group. + lines(days, group+group_std_error, lty=2); + lines(days, group-group_std_error, lty=2); + } + } + } else if (chart_type=="pop_size_by_generation") { + if (life_stage=="Total") { + title_str = ": Total Pop by Gen :"; + } else if (life_stage=="Egg") { + title_str = ": Egg Pop by Gen :"; + } else if (life_stage=="Nymph") { + title_str = paste(":", life_stages_nymph, "Nymph Pop by Gen", ":", sep=" "); + } else if (life_stage=="Adult") { + title_str = paste(":", life_stages_adult, "Adult Pop by Gen", ":", sep=" "); + } + title = paste(insect, ": Reps", replications, title_str, location, ": Lat", latitude, ":", start_date, "-", end_date, sep=" "); + legend_text = c("P", "F1", "F2"); + columns = c(1, 2, 4); + plot(days, group, main=title, type="l", ylim=c(0, maxval), axes=FALSE, lwd=2, xlab="", ylab="", cex=3, cex.lab=3, cex.axis=3, cex.main=3); + legend("topleft", legend_text, lty=c(1, 1, 1), col=columns, cex=3); + lines(days, group2, lwd=2, lty=1, col=2); + lines(days, group3, lwd=2, lty=1, col=4); + axis(side=1, at=ticks, labels=date_labels, las=2, font.axis=3, xpd=TRUE, cex=3, cex.lab=3, cex.axis=3, cex.main=3); + axis(side=2, font.axis=3, xpd=TRUE, cex=3, cex.lab=3, cex.axis=3, cex.main=3); + if (plot_std_error=="yes") { + # Standard error for group. + lines(days, group+group_std_error, lty=2); + lines(days, group-group_std_error, lty=2); + # Standard error for group2. + lines(days, group2+group2_std_error, col=2, lty=2); + lines(days, group2-group2_std_error, col=2, lty=2); + # Standard error for group3. + lines(days, group3+group3_std_error, col=4, lty=2); + lines(days, group3-group3_std_error, col=4, lty=2); + } + } +} + +stop_err = function(msg) { + cat(msg, file=stderr()); + quit(save="no", status=1); +} + +validate_date = function(date_str) { + valid_date = as.Date(date_str, format="%Y-%m-%d"); + if( class(valid_date)=="try-error" || is.na(valid_date)) { + msg = paste("Invalid date: ", date_str, ", valid date format is yyyy-mm-dd.", sep=""); + stop_err(msg); + } + return(valid_date); +}