Mercurial > repos > greg > extract_ipm_date_interval
changeset 0:4dccc60b3525 draft
Uploaded
author | greg |
---|---|
date | Tue, 31 Jul 2018 14:36:56 -0400 |
parents | |
children | 0764ac25c68f |
files | .shed.yml extract_ipm_date_interval.R extract_ipm_date_interval.xml utils.R |
diffstat | 4 files changed, 511 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.shed.yml Tue Jul 31 14:36:56 2018 -0400 @@ -0,0 +1,13 @@ +name: extract_ipm_date_interval +owner: greg +description: | + Contains a tool that extracts a date interval from the dataset collection produced by the insect_phenology_model tool. +homepage_url: https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/entomology/extract_ipm_date_interval +long_description: | + Contains a tool that extracts a date interval from the (data) dataset collection produced by the insect_phenology_model + tool and zooms into the interval by producing plots with daily ticks on the x axis instead of weekly) tick marks. The + (data) dataset collection consisting of the restricted date interval is also produced. +remote_repository_url: https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/entomology/extract_ipm_date_interval +type: unrestricted +categories: +- Entomology
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extract_ipm_date_interval.R Tue Jul 31 14:36:56 2018 -0400 @@ -0,0 +1,77 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("data.table")) +suppressPackageStartupMessages(library("optparse")) + +option_list <- list( + make_option(c("--input_dir"), action="store", dest="input_dir", help="Directory containing .csv outputs from insect_phenology_model"), + make_option(c("--start_date"), action="store", dest="start_date", default=NULL, help="Start date for custom date interval"), + make_option(c("--script_dir"), action="store", dest="script_dir", help="R script source directory") +) + +parser <- OptionParser(usage="%prog [options] file", option_list=option_list); +args <- parse_args(parser, positional_arguments=TRUE); +opt <- args$options; + +get_new_temperature_data_frame = function(input_file) { + temperature_data_frame = read.csv(file=input_file, header=T, strip.white=TRUE, stringsAsFactors=FALSE, sep=","); + colnames(temperature_data_frame) = c("LATITUDE", "LONGITUDE", "DATE", "DOY", "TMIN", "TMAX"); + return(temperature_data_frame); +} + +# Import the shared utility functions. +utils_path <- paste(opt$script_dir, "utils.R", sep="/"); +source(utils_path); + +# FIXME: currently custom date fields are free text, but +# Galaxy should soon include support for a date selector +# at which point this tool should be enhanced to use it. +# Validate start_date. + +# Calaculate the number of days in the date interval. +start_date = validate_date(opt$start_date); +# Validate end_date. +end_date = validate_date(opt$end_date); +if (start_date >= end_date) { + stop_err("The start date must be between 1 and 50 days before the end date when setting date intervals for plots."); +} +# Calculate the number of days in the date interval. +num_days = difftime(end_date, start_date, units=c("days")); +# Add 1 to the number of days to make the dates inclusive. For +# example, if the user enters a date range of 2018-01-01 to +# 2018-01-31, they likely expect the end date to be included. +num_days = num_days + 1; +if (num_days > 50) { + # We need to restrict date intervals since + # plots render tick marks for each day. + stop_err("Date intervals for plotting cannot exceed 50 days."); +} +# Display the total number of days in the Galaxy history item blurb. +cat("Number of days in date interval: ", num_days, "\n"); + +# Get the year from the start_date. +year = get_year_from_date(start_date); +# Get the DOY for start_date and end_date. +start_date_doy = as.integer(strftime(start_date, format="%j")); +end_date_doy = as.integer(strftime(end_date, format="%j")); +# Get the ticks date labels for plots. +ticks_and_labels = get_x_axis_ticks_and_labels(temperature_data_frame, prepend_end_doy_norm, append_start_doy_norm, date_interval=TRUE); +ticks = c(unlist(ticks_and_labels[1])); +date_labels = c(unlist(ticks_and_labels[2])); +# All latitude values are the same, so get the value +# for plots from the first row. +latitude = temperature_data_frame$LATITUDE[1]; + +input_files = list.files(path=input_dir, full.names=TRUE); +for(input_file in input_files) { + temperature_data_frame = get_new_temperature_data_frame(input_file); + start_date_row = which(temperature_data_frame$DATE==start_date); + end_date_row = which(temperature_data_frame$DATE==end_date); + # Extract the date interval. + temperature_data_frame = temperature_data_frame[start_date_row:end_date_row,]; + # Save the date interval data into an output file + # named the same as the input. + file_path = paste("output_data_dir", input_file, sep="/"); + write.csv(temperature_data_frame, file=file_path, row.names=F); + # TODO: Save the dat interval plots... +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extract_ipm_date_interval.xml Tue Jul 31 14:36:56 2018 -0400 @@ -0,0 +1,106 @@ +<tool id="ipm_date_interval" name="Extract date interval" version="1.1.0"> + <description>from insect phenology model data</description> + <requirements> + <requirement type="package" version="1.4.4">r-optparse</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ +#import os +#set input_dir = 'input_dir' +#set output_data_dir = "output_data_dir" +#set output_plots_dir = "output_plots_dir" +#set error_file = $os.path.join($output_data_dir, "04_combined_generations.csv") +mkdir $input_dir && +mkdir output_data_dir && +mkdir output_plots_dir && +#for $i in $input: + #set filename = $i.file_name + #set name = $i.name + ln -s $filename $input_dir/$name && +#end for +Rscript '$__tool_directory__/extract_ipm_date_interval.R' +--end_date '$end_date' +--input_dir '$input_dir' +--plot_std_error $plot_std_error +--script_dir '$__tool_directory__' +--start_date '$start_date' +&>ipm_log.txt; +if [[ $? -ne 0 ]]; then + cp ipm_log.txt '$error_file'; + exit 1; +fi]]></command> + <inputs> + <param name="input" type="data_collection" format="csv" collection_type="list" label="Insect phenology model data files" /> + <param name="start_date" type="text" value="" label="Start date" help="Format must be yyyy-mm-dd"> + <validator type="expression" message="Date must have the format yyyy-mm-dd">len(value.split('-')[0])==4 and int(value.split('-')[0]) and len(value.split('-')[1])==2 and int(value.split('-')[1]) and len(value.split('-')[2])==2 and int(value.split('-')[2])</validator> + </param> + <param name="end_date" type="text" value="" label="End date" help="Format must be yyyy-mm-dd"> + <validator type="expression" message="Date must have the format yyyy-mm-dd">len(value.split('-')[0])==4 and int(value.split('-')[0]) and len(value.split('-')[1])==2 and int(value.split('-')[1]) and len(value.split('-')[2])==2 and int(value.split('-')[2])</validator> + </param> + <param name="plot_std_error" type="select" label="Plot standard error?"> + <option value="yes" selected="True">Yes</option> + <option value="no">No</option> + </param> + </inputs> + <outputs> + <collection name="output_data_collection" type="list" label="${tool.name} (data), on ${on_string}"> + <discover_datasets pattern="__name__" directory="output_data_dir" format="csv"/> + </collection> + <collection name="output_plots_collection" type="list" label="${tool.name} (plots), on ${on_string}"> + <discover_datasets pattern="__name__" directory="output_plots_dir" format="pdf"/> + </collection> + </outputs> + <tests> + <test> + <param name="input"> + <collection type="list"> + <element name="04_combined_generations.csv"/> + </collection> + </param> + <param name="start_date" value="2017-04-01"/> + <param name="end_date" value="2017-04-15"/> + <output_collection name="output_data_collection" type="list"> + <element name="04_combined_generations.csv" file="output_combined6.csv" ftype="csv" compare="contains"/> + </output_collection> + <!-- + <output_collection name="output_plots_collection" type="list"> + <element name="02_young_nymph_pop.pdf" file="plot.pdf" ftype="pdf" compare="contains"/> + <element name="05_pre-vittelogenic_adult_pop.pdf" file="plot.pdf" ftype="pdf" compare="contains"/> + </output_collection> + --> + </test> + <test> + <param name="input"> + <collection type="list"> + <element name="04_combined_generations.csv"/> + </collection> + </param> + <param name="start_date" value="2017-01-01"/> + <param name="end_date" value="2017-01-15"/> + <output_collection name="output_data_collection" type="list"> + <element name="04_combined_generations.csv" file="output_combined7.csv" ftype="csv" compare="contains"/> + </output_collection> + <!-- + <output_collection name="output_plots_collection" type="list"> + <element name="02_young_nymph_pop.pdf" file="plot.pdf" ftype="pdf" compare="contains"/> + <element name="05_pre-vittelogenic_adult_pop.pdf" file="plot.pdf" ftype="pdf" compare="contains"/> + </output_collection> + --> + </test> + </tests> + <help> +**What it does** + +Provides an agent-based stochastic model expressing stage-specific phenology and population dynamics for an insect species across geographic regions. + +----- + +**Required options** + + * **Plot standard error** - add standard error lines to plot. + </help> + <citations> + <citation type="doi">10.3389/fphys.2016.00165</citation> + <citation type="doi">10.1175/JTECH-D-11-00103.1</citation> + <citation type="doi">10.7289/V5D21VHZ</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/utils.R Tue Jul 31 14:36:56 2018 -0400 @@ -0,0 +1,315 @@ +#!/usr/bin/env Rscript + +get_file_path = function(life_stage, base_name, life_stage_nymph=NULL, life_stage_adult=NULL) { + if (!is.null(life_stage_nymph)) { + lsi = get_life_stage_index(life_stage, life_stage_nymph=life_stage_nymph); + file_name = paste(lsi, tolower(life_stage_nymph), base_name, sep="_"); + } else if (!is.null(life_stage_adult)) { + lsi = get_life_stage_index(life_stage, life_stage_adult=life_stage_adult); + file_name = paste(lsi, tolower(life_stage_adult), base_name, sep="_"); + } else { + lsi = get_life_stage_index(life_stage); + file_name = paste(lsi, base_name, sep="_"); + } + file_path = paste("output_plots_dir", file_name, sep="/"); + return(file_path); +} + +get_life_stage_index = function(life_stage, life_stage_nymph=NULL, life_stage_adult=NULL) { + # Name collection elements so that they + # are displayed in logical order. + if (life_stage=="Egg") { + lsi = "01"; + } else if (life_stage=="Nymph") { + if (life_stage_nymph=="Young") { + lsi = "02"; + } else if (life_stage_nymph=="Old") { + lsi = "03"; + } else if (life_stage_nymph=="Total") { + lsi="04"; + } + } else if (life_stage=="Adult") { + if (life_stage_adult=="Pre-vittelogenic") { + lsi = "05"; + } else if (life_stage_adult=="Vittelogenic") { + lsi = "06"; + } else if (life_stage_adult=="Diapausing") { + lsi = "07"; + } else if (life_stage_adult=="Total") { + lsi = "08"; + } + } else if (life_stage=="Total") { + lsi = "09"; + } + return(lsi); +} + +get_mean_and_std_error = function(p_replications, f1_replications, f2_replications) { + # P mean. + p_m = apply(p_replications, 1, mean); + # P standard error. + p_se = apply(p_replications, 1, sd) / sqrt(opt$replications); + # F1 mean. + f1_m = apply(f1_replications, 1, mean); + # F1 standard error. + f1_se = apply(f1_replications, 1, sd) / sqrt(opt$replications); + # F2 mean. + f2_m = apply(f2_replications, 1, mean); + # F2 standard error. + f2_se = apply(f2_replications, 1, sd) / sqrt(opt$replications); + return(list(p_m, p_se, f1_m, f1_se, f2_m, f2_se)) +} + +get_tick_index = function(index, last_tick, ticks, tick_labels, tick_sep) { + # The R code tries hard not to draw overlapping tick labels, and so + # will omit labels where they would abut or overlap previously drawn + # labels. This can result in, for example, every other tick being + # labelled. We'll keep track of the last tick to make sure all of + # the month labels are displayed, and missing ticks are restricted + # to Sundays which have no labels anyway. + if (last_tick==0) { + return(length(ticks)+1); + } + last_saved_tick = ticks[[length(ticks)]]; + if (index-last_saved_tick<tick_sep) { + last_saved_month = tick_labels[[length(tick_labels)]]; + if (last_saved_month=="") { + # We're safe overwriting a tick + # with no label (i.e., a Sunday tick). + return(length(ticks)); + } else { + # Don't eliminate a Month label. + return(NULL); + } + } + return(length(ticks)+1); +} + +get_total_days = function(is_leap_year) { + # Get the total number of days in the current year. + if (is_leap_year) { + return(366); + } else { + return(365); + } +} + +get_x_axis_ticks_and_labels = function(temperature_data_frame, prepend_end_doy_norm, append_start_doy_norm, date_interval=FALSE) { + # Generate a list of ticks and labels for plotting the x axis. + if (prepend_end_doy_norm > 0) { + prepend_end_norm_row = which(temperature_data_frame$DOY==prepend_end_doy_norm); + } else { + prepend_end_norm_row = 0; + } + if (append_start_doy_norm > 0) { + append_start_norm_row = which(temperature_data_frame$DOY==append_start_doy_norm); + } else { + append_start_norm_row = 0; + } + num_rows = dim(temperature_data_frame)[1]; + tick_labels = list(); + ticks = list(); + current_month_label = NULL; + last_tick = 0; + if (date_interval) { + tick_sep = 0; + } else { + tick_sep = 3; + } + for (i in 1:num_rows) { + # Get the year and month from the date which + # has the format YYYY-MM-DD. + date = format(temperature_data_frame$DATE[i]); + # Get the month label. + items = strsplit(date, "-")[[1]]; + month = items[2]; + month_label = month.abb[as.integer(month)]; + day = as.integer(items[3]); + doy = as.integer(temperature_data_frame$DOY[i]); + # We're plotting the entire year, so ticks will + # occur on Sundays and the first of each month. + if (i == prepend_end_norm_row) { + # Add a tick for the end of the 30 year normnals data + # that was prepended to the year-to-date data. + label_str = "End prepended 30 year normals"; + tick_index = get_tick_index(i, last_tick, ticks, tick_labels, tick_sep) + ticks[tick_index] = i; + if (date_interval) { + # Append the day to label_str + tick_labels[tick_index] = paste(label_str, day, sep=" "); + } else { + tick_labels[tick_index] = label_str; + } + last_tick = i; + } else if (doy == append_start_doy_norm) { + # Add a tick for the start of the 30 year normnals data + # that was appended to the year-to-date data. + label_str = "Start appended 30 year normals"; + tick_index = get_tick_index(i, last_tick, ticks, tick_labels, tick_sep) + ticks[tick_index] = i; + if (!identical(current_month_label, month_label)) { + # Append the month to label_str. + label_str = paste(label_str, month_label, spe=" "); + current_month_label = month_label; + } + if (date_interval) { + # Append the day to label_str + label_str = paste(label_str, day, sep=" "); + } + tick_labels[tick_index] = label_str; + last_tick = i; + } else if (i==num_rows) { + # Add a tick for the last day of the year. + label_str = ""; + tick_index = get_tick_index(i, last_tick, ticks, tick_labels, tick_sep) + ticks[tick_index] = i; + if (!identical(current_month_label, month_label)) { + # Append the month to label_str. + label_str = month_label; + current_month_label = month_label; + } + if (date_interval) { + # Append the day to label_str + label_str = paste(label_str, day, sep=" "); + } + tick_labels[tick_index] = label_str; + } else { + if (!identical(current_month_label, month_label)) { + # Add a tick for the month. + tick_index = get_tick_index(i, last_tick, ticks, tick_labels, tick_sep) + ticks[tick_index] = i; + if (date_interval) { + # Append the day to the month. + tick_labels[tick_index] = paste(month_label, day, sep=" "); + } else { + tick_labels[tick_index] = month_label; + } + current_month_label = month_label; + last_tick = i; + } + tick_index = get_tick_index(i, last_tick, ticks, tick_labels, tick_sep) + if (!is.null(tick_index)) { + if (date_interval) { + # Add a tick for every day. The first tick is the + # month label, so add a tick only if i is not 1 + if (i>1 & day>1) { + tick_index = get_tick_index(i, last_tick, ticks, tick_labels, tick_sep) + ticks[tick_index] = i; + # Add the day as the label. + tick_labels[tick_index] = day; + last_tick = i; + } + } else { + # Get the day. + day = weekdays(as.Date(date)); + if (day=="Sunday") { + # Add a tick if we're on a Sunday. + ticks[tick_index] = i; + # Add a blank month label so it is not displayed. + tick_labels[tick_index] = ""; + last_tick = i; + } + } + } + } + } + return(list(ticks, tick_labels)); +} + +render_chart = function(ticks, date_labels, chart_type, plot_std_error, insect, location, latitude, start_date, end_date, days, maxval, + replications, life_stage, group, group_std_error, group2=NULL, group2_std_error=NULL, group3=NULL, group3_std_error=NULL, + life_stages_adult=NULL, life_stages_nymph=NULL) { + if (chart_type=="pop_size_by_life_stage") { + if (life_stage=="Total") { + title = paste(insect, ": Reps", replications, ":", life_stage, "Pop :", location, ": Lat", latitude, ":", start_date, "-", end_date, sep=" "); + legend_text = c("Egg", "Nymph", "Adult"); + columns = c(4, 2, 1); + plot(days, group, main=title, type="l", ylim=c(0, maxval), axes=FALSE, lwd=2, xlab="", ylab="", cex=3, cex.lab=3, cex.axis=3, cex.main=3); + legend("topleft", legend_text, lty=c(1, 1, 1), col=columns, cex=3); + lines(days, group2, lwd=2, lty=1, col=2); + lines(days, group3, lwd=2, lty=1, col=4); + axis(side=1, at=ticks, labels=date_labels, las=2, font.axis=3, xpd=TRUE, cex=3, cex.lab=3, cex.axis=3, cex.main=3); + axis(side=2, font.axis=3, xpd=TRUE, cex=3, cex.lab=3, cex.axis=3, cex.main=3); + if (plot_std_error=="yes") { + # Standard error for group. + lines(days, group+group_std_error, lty=2); + lines(days, group-group_std_error, lty=2); + # Standard error for group2. + lines(days, group2+group2_std_error, col=2, lty=2); + lines(days, group2-group2_std_error, col=2, lty=2); + # Standard error for group3. + lines(days, group3+group3_std_error, col=4, lty=2); + lines(days, group3-group3_std_error, col=4, lty=2); + } + } else { + if (life_stage=="Egg") { + title = paste(insect, ": Reps", replications, ":", life_stage, "Pop :", location, ": Lat", latitude, ":", start_date, "-", end_date, sep=" "); + legend_text = c(life_stage); + columns = c(4); + } else if (life_stage=="Nymph") { + stage = paste(life_stages_nymph, "Nymph Pop :", sep=" "); + title = paste(insect, ": Reps", replications, ":", stage, location, ": Lat", latitude, ":", start_date, "-", end_date, sep=" "); + legend_text = c(paste(life_stages_nymph, life_stage, sep=" ")); + columns = c(2); + } else if (life_stage=="Adult") { + stage = paste(life_stages_adult, "Adult Pop", sep=" "); + title = paste(insect, ": Reps", replications, ":", stage, location, ": Lat", latitude, ":", start_date, "-", end_date, sep=" "); + legend_text = c(paste(life_stages_adult, life_stage, sep=" ")); + columns = c(1); + } + plot(days, group, main=title, type="l", ylim=c(0, maxval), axes=FALSE, lwd=2, xlab="", ylab="", cex=3, cex.lab=3, cex.axis=3, cex.main=3); + legend("topleft", legend_text, lty=c(1), col="black", cex=3); + axis(side=1, at=ticks, labels=date_labels, las=2, font.axis=3, xpd=TRUE, cex=3, cex.lab=3, cex.axis=3, cex.main=3); + axis(side=2, font.axis=3, xpd=TRUE, cex=3, cex.lab=3, cex.axis=3, cex.main=3); + if (plot_std_error=="yes") { + # Standard error for group. + lines(days, group+group_std_error, lty=2); + lines(days, group-group_std_error, lty=2); + } + } + } else if (chart_type=="pop_size_by_generation") { + if (life_stage=="Total") { + title_str = ": Total Pop by Gen :"; + } else if (life_stage=="Egg") { + title_str = ": Egg Pop by Gen :"; + } else if (life_stage=="Nymph") { + title_str = paste(":", life_stages_nymph, "Nymph Pop by Gen", ":", sep=" "); + } else if (life_stage=="Adult") { + title_str = paste(":", life_stages_adult, "Adult Pop by Gen", ":", sep=" "); + } + title = paste(insect, ": Reps", replications, title_str, location, ": Lat", latitude, ":", start_date, "-", end_date, sep=" "); + legend_text = c("P", "F1", "F2"); + columns = c(1, 2, 4); + plot(days, group, main=title, type="l", ylim=c(0, maxval), axes=FALSE, lwd=2, xlab="", ylab="", cex=3, cex.lab=3, cex.axis=3, cex.main=3); + legend("topleft", legend_text, lty=c(1, 1, 1), col=columns, cex=3); + lines(days, group2, lwd=2, lty=1, col=2); + lines(days, group3, lwd=2, lty=1, col=4); + axis(side=1, at=ticks, labels=date_labels, las=2, font.axis=3, xpd=TRUE, cex=3, cex.lab=3, cex.axis=3, cex.main=3); + axis(side=2, font.axis=3, xpd=TRUE, cex=3, cex.lab=3, cex.axis=3, cex.main=3); + if (plot_std_error=="yes") { + # Standard error for group. + lines(days, group+group_std_error, lty=2); + lines(days, group-group_std_error, lty=2); + # Standard error for group2. + lines(days, group2+group2_std_error, col=2, lty=2); + lines(days, group2-group2_std_error, col=2, lty=2); + # Standard error for group3. + lines(days, group3+group3_std_error, col=4, lty=2); + lines(days, group3-group3_std_error, col=4, lty=2); + } + } +} + +stop_err = function(msg) { + cat(msg, file=stderr()); + quit(save="no", status=1); +} + +validate_date = function(date_str) { + valid_date = as.Date(date_str, format="%Y-%m-%d"); + if( class(valid_date)=="try-error" || is.na(valid_date)) { + msg = paste("Invalid date: ", date_str, ", valid date format is yyyy-mm-dd.", sep=""); + stop_err(msg); + } + return(valid_date); +}