# HG changeset patch
# User greg
# Date 1533062216 14400
# Node ID 4dccc60b35252bce9a45a2f48b8279953c2ab4db
Uploaded
diff -r 000000000000 -r 4dccc60b3525 .shed.yml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/.shed.yml Tue Jul 31 14:36:56 2018 -0400
@@ -0,0 +1,13 @@
+name: extract_ipm_date_interval
+owner: greg
+description: |
+ Contains a tool that extracts a date interval from the dataset collection produced by the insect_phenology_model tool.
+homepage_url: https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/entomology/extract_ipm_date_interval
+long_description: |
+ Contains a tool that extracts a date interval from the (data) dataset collection produced by the insect_phenology_model
+ tool and zooms into the interval by producing plots with daily ticks on the x axis instead of weekly) tick marks. The
+ (data) dataset collection consisting of the restricted date interval is also produced.
+remote_repository_url: https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/entomology/extract_ipm_date_interval
+type: unrestricted
+categories:
+- Entomology
diff -r 000000000000 -r 4dccc60b3525 extract_ipm_date_interval.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/extract_ipm_date_interval.R Tue Jul 31 14:36:56 2018 -0400
@@ -0,0 +1,77 @@
+#!/usr/bin/env Rscript
+
+suppressPackageStartupMessages(library("data.table"))
+suppressPackageStartupMessages(library("optparse"))
+
+option_list <- list(
+ make_option(c("--input_dir"), action="store", dest="input_dir", help="Directory containing .csv outputs from insect_phenology_model"),
+ make_option(c("--start_date"), action="store", dest="start_date", default=NULL, help="Start date for custom date interval"),
+ make_option(c("--script_dir"), action="store", dest="script_dir", help="R script source directory")
+)
+
+parser <- OptionParser(usage="%prog [options] file", option_list=option_list);
+args <- parse_args(parser, positional_arguments=TRUE);
+opt <- args$options;
+
+get_new_temperature_data_frame = function(input_file) {
+ temperature_data_frame = read.csv(file=input_file, header=T, strip.white=TRUE, stringsAsFactors=FALSE, sep=",");
+ colnames(temperature_data_frame) = c("LATITUDE", "LONGITUDE", "DATE", "DOY", "TMIN", "TMAX");
+ return(temperature_data_frame);
+}
+
+# Import the shared utility functions.
+utils_path <- paste(opt$script_dir, "utils.R", sep="/");
+source(utils_path);
+
+# FIXME: currently custom date fields are free text, but
+# Galaxy should soon include support for a date selector
+# at which point this tool should be enhanced to use it.
+# Validate start_date.
+
+# Calaculate the number of days in the date interval.
+start_date = validate_date(opt$start_date);
+# Validate end_date.
+end_date = validate_date(opt$end_date);
+if (start_date >= end_date) {
+ stop_err("The start date must be between 1 and 50 days before the end date when setting date intervals for plots.");
+}
+# Calculate the number of days in the date interval.
+num_days = difftime(end_date, start_date, units=c("days"));
+# Add 1 to the number of days to make the dates inclusive. For
+# example, if the user enters a date range of 2018-01-01 to
+# 2018-01-31, they likely expect the end date to be included.
+num_days = num_days + 1;
+if (num_days > 50) {
+ # We need to restrict date intervals since
+ # plots render tick marks for each day.
+ stop_err("Date intervals for plotting cannot exceed 50 days.");
+}
+# Display the total number of days in the Galaxy history item blurb.
+cat("Number of days in date interval: ", num_days, "\n");
+
+# Get the year from the start_date.
+year = get_year_from_date(start_date);
+# Get the DOY for start_date and end_date.
+start_date_doy = as.integer(strftime(start_date, format="%j"));
+end_date_doy = as.integer(strftime(end_date, format="%j"));
+# Get the ticks date labels for plots.
+ticks_and_labels = get_x_axis_ticks_and_labels(temperature_data_frame, prepend_end_doy_norm, append_start_doy_norm, date_interval=TRUE);
+ticks = c(unlist(ticks_and_labels[1]));
+date_labels = c(unlist(ticks_and_labels[2]));
+# All latitude values are the same, so get the value
+# for plots from the first row.
+latitude = temperature_data_frame$LATITUDE[1];
+
+input_files = list.files(path=input_dir, full.names=TRUE);
+for(input_file in input_files) {
+ temperature_data_frame = get_new_temperature_data_frame(input_file);
+ start_date_row = which(temperature_data_frame$DATE==start_date);
+ end_date_row = which(temperature_data_frame$DATE==end_date);
+ # Extract the date interval.
+ temperature_data_frame = temperature_data_frame[start_date_row:end_date_row,];
+ # Save the date interval data into an output file
+ # named the same as the input.
+ file_path = paste("output_data_dir", input_file, sep="/");
+ write.csv(temperature_data_frame, file=file_path, row.names=F);
+ # TODO: Save the dat interval plots...
+}
diff -r 000000000000 -r 4dccc60b3525 extract_ipm_date_interval.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/extract_ipm_date_interval.xml Tue Jul 31 14:36:56 2018 -0400
@@ -0,0 +1,106 @@
+
+ from insect phenology model data
+
+ r-optparse
+
+ ipm_log.txt;
+if [[ $? -ne 0 ]]; then
+ cp ipm_log.txt '$error_file';
+ exit 1;
+fi]]>
+
+
+
+ len(value.split('-')[0])==4 and int(value.split('-')[0]) and len(value.split('-')[1])==2 and int(value.split('-')[1]) and len(value.split('-')[2])==2 and int(value.split('-')[2])
+
+
+ len(value.split('-')[0])==4 and int(value.split('-')[0]) and len(value.split('-')[1])==2 and int(value.split('-')[1]) and len(value.split('-')[2])==2 and int(value.split('-')[2])
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+Provides an agent-based stochastic model expressing stage-specific phenology and population dynamics for an insect species across geographic regions.
+
+-----
+
+**Required options**
+
+ * **Plot standard error** - add standard error lines to plot.
+
+
+ 10.3389/fphys.2016.00165
+ 10.1175/JTECH-D-11-00103.1
+ 10.7289/V5D21VHZ
+
+
diff -r 000000000000 -r 4dccc60b3525 utils.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/utils.R Tue Jul 31 14:36:56 2018 -0400
@@ -0,0 +1,315 @@
+#!/usr/bin/env Rscript
+
+get_file_path = function(life_stage, base_name, life_stage_nymph=NULL, life_stage_adult=NULL) {
+ if (!is.null(life_stage_nymph)) {
+ lsi = get_life_stage_index(life_stage, life_stage_nymph=life_stage_nymph);
+ file_name = paste(lsi, tolower(life_stage_nymph), base_name, sep="_");
+ } else if (!is.null(life_stage_adult)) {
+ lsi = get_life_stage_index(life_stage, life_stage_adult=life_stage_adult);
+ file_name = paste(lsi, tolower(life_stage_adult), base_name, sep="_");
+ } else {
+ lsi = get_life_stage_index(life_stage);
+ file_name = paste(lsi, base_name, sep="_");
+ }
+ file_path = paste("output_plots_dir", file_name, sep="/");
+ return(file_path);
+}
+
+get_life_stage_index = function(life_stage, life_stage_nymph=NULL, life_stage_adult=NULL) {
+ # Name collection elements so that they
+ # are displayed in logical order.
+ if (life_stage=="Egg") {
+ lsi = "01";
+ } else if (life_stage=="Nymph") {
+ if (life_stage_nymph=="Young") {
+ lsi = "02";
+ } else if (life_stage_nymph=="Old") {
+ lsi = "03";
+ } else if (life_stage_nymph=="Total") {
+ lsi="04";
+ }
+ } else if (life_stage=="Adult") {
+ if (life_stage_adult=="Pre-vittelogenic") {
+ lsi = "05";
+ } else if (life_stage_adult=="Vittelogenic") {
+ lsi = "06";
+ } else if (life_stage_adult=="Diapausing") {
+ lsi = "07";
+ } else if (life_stage_adult=="Total") {
+ lsi = "08";
+ }
+ } else if (life_stage=="Total") {
+ lsi = "09";
+ }
+ return(lsi);
+}
+
+get_mean_and_std_error = function(p_replications, f1_replications, f2_replications) {
+ # P mean.
+ p_m = apply(p_replications, 1, mean);
+ # P standard error.
+ p_se = apply(p_replications, 1, sd) / sqrt(opt$replications);
+ # F1 mean.
+ f1_m = apply(f1_replications, 1, mean);
+ # F1 standard error.
+ f1_se = apply(f1_replications, 1, sd) / sqrt(opt$replications);
+ # F2 mean.
+ f2_m = apply(f2_replications, 1, mean);
+ # F2 standard error.
+ f2_se = apply(f2_replications, 1, sd) / sqrt(opt$replications);
+ return(list(p_m, p_se, f1_m, f1_se, f2_m, f2_se))
+}
+
+get_tick_index = function(index, last_tick, ticks, tick_labels, tick_sep) {
+ # The R code tries hard not to draw overlapping tick labels, and so
+ # will omit labels where they would abut or overlap previously drawn
+ # labels. This can result in, for example, every other tick being
+ # labelled. We'll keep track of the last tick to make sure all of
+ # the month labels are displayed, and missing ticks are restricted
+ # to Sundays which have no labels anyway.
+ if (last_tick==0) {
+ return(length(ticks)+1);
+ }
+ last_saved_tick = ticks[[length(ticks)]];
+ if (index-last_saved_tick 0) {
+ prepend_end_norm_row = which(temperature_data_frame$DOY==prepend_end_doy_norm);
+ } else {
+ prepend_end_norm_row = 0;
+ }
+ if (append_start_doy_norm > 0) {
+ append_start_norm_row = which(temperature_data_frame$DOY==append_start_doy_norm);
+ } else {
+ append_start_norm_row = 0;
+ }
+ num_rows = dim(temperature_data_frame)[1];
+ tick_labels = list();
+ ticks = list();
+ current_month_label = NULL;
+ last_tick = 0;
+ if (date_interval) {
+ tick_sep = 0;
+ } else {
+ tick_sep = 3;
+ }
+ for (i in 1:num_rows) {
+ # Get the year and month from the date which
+ # has the format YYYY-MM-DD.
+ date = format(temperature_data_frame$DATE[i]);
+ # Get the month label.
+ items = strsplit(date, "-")[[1]];
+ month = items[2];
+ month_label = month.abb[as.integer(month)];
+ day = as.integer(items[3]);
+ doy = as.integer(temperature_data_frame$DOY[i]);
+ # We're plotting the entire year, so ticks will
+ # occur on Sundays and the first of each month.
+ if (i == prepend_end_norm_row) {
+ # Add a tick for the end of the 30 year normnals data
+ # that was prepended to the year-to-date data.
+ label_str = "End prepended 30 year normals";
+ tick_index = get_tick_index(i, last_tick, ticks, tick_labels, tick_sep)
+ ticks[tick_index] = i;
+ if (date_interval) {
+ # Append the day to label_str
+ tick_labels[tick_index] = paste(label_str, day, sep=" ");
+ } else {
+ tick_labels[tick_index] = label_str;
+ }
+ last_tick = i;
+ } else if (doy == append_start_doy_norm) {
+ # Add a tick for the start of the 30 year normnals data
+ # that was appended to the year-to-date data.
+ label_str = "Start appended 30 year normals";
+ tick_index = get_tick_index(i, last_tick, ticks, tick_labels, tick_sep)
+ ticks[tick_index] = i;
+ if (!identical(current_month_label, month_label)) {
+ # Append the month to label_str.
+ label_str = paste(label_str, month_label, spe=" ");
+ current_month_label = month_label;
+ }
+ if (date_interval) {
+ # Append the day to label_str
+ label_str = paste(label_str, day, sep=" ");
+ }
+ tick_labels[tick_index] = label_str;
+ last_tick = i;
+ } else if (i==num_rows) {
+ # Add a tick for the last day of the year.
+ label_str = "";
+ tick_index = get_tick_index(i, last_tick, ticks, tick_labels, tick_sep)
+ ticks[tick_index] = i;
+ if (!identical(current_month_label, month_label)) {
+ # Append the month to label_str.
+ label_str = month_label;
+ current_month_label = month_label;
+ }
+ if (date_interval) {
+ # Append the day to label_str
+ label_str = paste(label_str, day, sep=" ");
+ }
+ tick_labels[tick_index] = label_str;
+ } else {
+ if (!identical(current_month_label, month_label)) {
+ # Add a tick for the month.
+ tick_index = get_tick_index(i, last_tick, ticks, tick_labels, tick_sep)
+ ticks[tick_index] = i;
+ if (date_interval) {
+ # Append the day to the month.
+ tick_labels[tick_index] = paste(month_label, day, sep=" ");
+ } else {
+ tick_labels[tick_index] = month_label;
+ }
+ current_month_label = month_label;
+ last_tick = i;
+ }
+ tick_index = get_tick_index(i, last_tick, ticks, tick_labels, tick_sep)
+ if (!is.null(tick_index)) {
+ if (date_interval) {
+ # Add a tick for every day. The first tick is the
+ # month label, so add a tick only if i is not 1
+ if (i>1 & day>1) {
+ tick_index = get_tick_index(i, last_tick, ticks, tick_labels, tick_sep)
+ ticks[tick_index] = i;
+ # Add the day as the label.
+ tick_labels[tick_index] = day;
+ last_tick = i;
+ }
+ } else {
+ # Get the day.
+ day = weekdays(as.Date(date));
+ if (day=="Sunday") {
+ # Add a tick if we're on a Sunday.
+ ticks[tick_index] = i;
+ # Add a blank month label so it is not displayed.
+ tick_labels[tick_index] = "";
+ last_tick = i;
+ }
+ }
+ }
+ }
+ }
+ return(list(ticks, tick_labels));
+}
+
+render_chart = function(ticks, date_labels, chart_type, plot_std_error, insect, location, latitude, start_date, end_date, days, maxval,
+ replications, life_stage, group, group_std_error, group2=NULL, group2_std_error=NULL, group3=NULL, group3_std_error=NULL,
+ life_stages_adult=NULL, life_stages_nymph=NULL) {
+ if (chart_type=="pop_size_by_life_stage") {
+ if (life_stage=="Total") {
+ title = paste(insect, ": Reps", replications, ":", life_stage, "Pop :", location, ": Lat", latitude, ":", start_date, "-", end_date, sep=" ");
+ legend_text = c("Egg", "Nymph", "Adult");
+ columns = c(4, 2, 1);
+ plot(days, group, main=title, type="l", ylim=c(0, maxval), axes=FALSE, lwd=2, xlab="", ylab="", cex=3, cex.lab=3, cex.axis=3, cex.main=3);
+ legend("topleft", legend_text, lty=c(1, 1, 1), col=columns, cex=3);
+ lines(days, group2, lwd=2, lty=1, col=2);
+ lines(days, group3, lwd=2, lty=1, col=4);
+ axis(side=1, at=ticks, labels=date_labels, las=2, font.axis=3, xpd=TRUE, cex=3, cex.lab=3, cex.axis=3, cex.main=3);
+ axis(side=2, font.axis=3, xpd=TRUE, cex=3, cex.lab=3, cex.axis=3, cex.main=3);
+ if (plot_std_error=="yes") {
+ # Standard error for group.
+ lines(days, group+group_std_error, lty=2);
+ lines(days, group-group_std_error, lty=2);
+ # Standard error for group2.
+ lines(days, group2+group2_std_error, col=2, lty=2);
+ lines(days, group2-group2_std_error, col=2, lty=2);
+ # Standard error for group3.
+ lines(days, group3+group3_std_error, col=4, lty=2);
+ lines(days, group3-group3_std_error, col=4, lty=2);
+ }
+ } else {
+ if (life_stage=="Egg") {
+ title = paste(insect, ": Reps", replications, ":", life_stage, "Pop :", location, ": Lat", latitude, ":", start_date, "-", end_date, sep=" ");
+ legend_text = c(life_stage);
+ columns = c(4);
+ } else if (life_stage=="Nymph") {
+ stage = paste(life_stages_nymph, "Nymph Pop :", sep=" ");
+ title = paste(insect, ": Reps", replications, ":", stage, location, ": Lat", latitude, ":", start_date, "-", end_date, sep=" ");
+ legend_text = c(paste(life_stages_nymph, life_stage, sep=" "));
+ columns = c(2);
+ } else if (life_stage=="Adult") {
+ stage = paste(life_stages_adult, "Adult Pop", sep=" ");
+ title = paste(insect, ": Reps", replications, ":", stage, location, ": Lat", latitude, ":", start_date, "-", end_date, sep=" ");
+ legend_text = c(paste(life_stages_adult, life_stage, sep=" "));
+ columns = c(1);
+ }
+ plot(days, group, main=title, type="l", ylim=c(0, maxval), axes=FALSE, lwd=2, xlab="", ylab="", cex=3, cex.lab=3, cex.axis=3, cex.main=3);
+ legend("topleft", legend_text, lty=c(1), col="black", cex=3);
+ axis(side=1, at=ticks, labels=date_labels, las=2, font.axis=3, xpd=TRUE, cex=3, cex.lab=3, cex.axis=3, cex.main=3);
+ axis(side=2, font.axis=3, xpd=TRUE, cex=3, cex.lab=3, cex.axis=3, cex.main=3);
+ if (plot_std_error=="yes") {
+ # Standard error for group.
+ lines(days, group+group_std_error, lty=2);
+ lines(days, group-group_std_error, lty=2);
+ }
+ }
+ } else if (chart_type=="pop_size_by_generation") {
+ if (life_stage=="Total") {
+ title_str = ": Total Pop by Gen :";
+ } else if (life_stage=="Egg") {
+ title_str = ": Egg Pop by Gen :";
+ } else if (life_stage=="Nymph") {
+ title_str = paste(":", life_stages_nymph, "Nymph Pop by Gen", ":", sep=" ");
+ } else if (life_stage=="Adult") {
+ title_str = paste(":", life_stages_adult, "Adult Pop by Gen", ":", sep=" ");
+ }
+ title = paste(insect, ": Reps", replications, title_str, location, ": Lat", latitude, ":", start_date, "-", end_date, sep=" ");
+ legend_text = c("P", "F1", "F2");
+ columns = c(1, 2, 4);
+ plot(days, group, main=title, type="l", ylim=c(0, maxval), axes=FALSE, lwd=2, xlab="", ylab="", cex=3, cex.lab=3, cex.axis=3, cex.main=3);
+ legend("topleft", legend_text, lty=c(1, 1, 1), col=columns, cex=3);
+ lines(days, group2, lwd=2, lty=1, col=2);
+ lines(days, group3, lwd=2, lty=1, col=4);
+ axis(side=1, at=ticks, labels=date_labels, las=2, font.axis=3, xpd=TRUE, cex=3, cex.lab=3, cex.axis=3, cex.main=3);
+ axis(side=2, font.axis=3, xpd=TRUE, cex=3, cex.lab=3, cex.axis=3, cex.main=3);
+ if (plot_std_error=="yes") {
+ # Standard error for group.
+ lines(days, group+group_std_error, lty=2);
+ lines(days, group-group_std_error, lty=2);
+ # Standard error for group2.
+ lines(days, group2+group2_std_error, col=2, lty=2);
+ lines(days, group2-group2_std_error, col=2, lty=2);
+ # Standard error for group3.
+ lines(days, group3+group3_std_error, col=4, lty=2);
+ lines(days, group3-group3_std_error, col=4, lty=2);
+ }
+ }
+}
+
+stop_err = function(msg) {
+ cat(msg, file=stderr());
+ quit(save="no", status=1);
+}
+
+validate_date = function(date_str) {
+ valid_date = as.Date(date_str, format="%Y-%m-%d");
+ if( class(valid_date)=="try-error" || is.na(valid_date)) {
+ msg = paste("Invalid date: ", date_str, ", valid date format is yyyy-mm-dd.", sep="");
+ stop_err(msg);
+ }
+ return(valid_date);
+}