Mercurial > repos > stevecassidy > parseeval
changeset 0:6c1e450b02c3 draft default tip
planemo upload commit 72cee9103c0ae4acb5794afaed179bea2c729f2c-dirty
author | stevecassidy |
---|---|
date | Sat, 11 Mar 2017 21:35:38 -0500 |
parents | |
children | |
files | ParseEval.xml parseval.R r_wrapper.sh |
diffstat | 3 files changed, 600 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ParseEval.xml Sat Mar 11 21:35:38 2017 -0500 @@ -0,0 +1,133 @@ +<tool id="ParseEval" name="ParseEval" version="1.0.0"> + + <description>Analyse articulatory data</description> + + <requirements> + <requirement type="set_environment">R_SCRIPT_PATH</requirement> + <requirement type="package" version="3.1.2">R</requirement> + <requirement type="package" version="0.20.33">R_lattice</requirement> + <requirement type="package" version="0.6.26">R_latticeExtra</requirement> + <requirement type="package" version="2.0.0">R_gridExtra</requirement> + </requirements> + + <command interpreter="sh"> + r_wrapper.sh $script_file $__tool_directory__ + </command> + + <inputs> + <param name="c1a" type="float" size="10" value="0" label="Gestural plateau duration" help="Articulatory data for the immediate pre-vocalic consonant" /> + <param name="c1b" type="float" size="10" value="0" label="Standard Deviation" help="Articulatory data for the immediate pre-vocalic consonant" /> + <param name="c2a" type="float" size="10" value="0" label="Gestural plateau duration" help="Articulatory data for the second consonant in tri-consonantal or the first consonant in bi-consonantal clusters" /> + <param name="c2b" type="float" size="10" value="0" label="Standard Deviation" help="Articulatory data for the second consonant in tri-consonantal or the first consonant in bi-consonantal clusters" /> + <param name="c3a" type="float" size="10" value="0" label="Gestural plateau duration" help="Articulatory data of the initial consonant in tri-consonantal clusters" /> + <param name="c3b" type="float" size="10" value="0" label="Standard Deviation" help="Articulatory data of the initial consonant in tri-consonantal clusters" /> + <param name="cipi12a" type="float" size="10" value="0" label="Inter-plateau duration" help="Duration of the interval between the plateaus of the first two consonants in tri-consonantal clusters" /> + <param name="cipi12b" type="float" size="10" value="0" label="Standard Deviation" help="Duration of the interval between the plateaus of the first two consonants in tri-consonantal clusters" /> + <param name="cipi23a" type="float" size="10" value="0" label="Inter-plateau duration" help="Duration of the interval between the plateaus of the first two consonants in bi-consonantal clusters or the duration of the interval between the middle and the pre-vocalic consonants in tri-consonantal clusters" /> + <param name="cipi23b" type="float" size="10" value="0" label="Standard Deviation" help="Duration of the interval between the plateaus of the first two consonants in bi-consonantal clusters or the duration of the interval between the middle and the pre-vocalic consonants in tri-consonantal clusters" /> + <param name="voweld" type="integer" size="10" value="0" label="Articulatory duration of the vocalic gesture" /> + <param name="anchor" type="integer" size="10" optional="true" label="Number of stepwise increases in variability simulated by the model" /> + <param name="var_anchor" type="integer" size="10" optional="true" label="Size (in milliseconds) of each stepwise increase in variability" /> + <param name="types" type="integer" size="10" value="2" label="Number of different word types" help="Must be either 2 for bi-consonantal clusters or 3 for tri-consonantal clusters"> + <validator type="in_range" min="2" max="3" message="Number of different word types must be either 2 or 3" /> + </param> + <param name="words" type="integer" size="10" value="12" label="Number of words (stimuli) for all word types" help="Must be divisible by the value of types above without a remainder" /> + <param name="simN" type="integer" size="10" optional="true" label="Number of simulation runs over which hit rate is calculated" /> + <param name="RE_rsd" type="float" size="10" value="0" label="Relative standard deviation of the right edge to anchor interval" /> + <param name="CC_rsd" type="float" size="10" value="0" label="Relative standard deviation of the center to anchor interval" /> + <param name="LE_rsd" type="float" size="10" value="0" label="Relative standard deviation of the left edge to anchor interval" /> + <param name="job_name" type="text" size="25" label="Supply a name for the outputs to remind you what they contain" value="ParseEval"/> + </inputs> + <outputs> + <data format="html" name="html_file" label="${job_name}"/> + </outputs> + + <configfiles> + <configfile name="script_file"> + + if(${words} %% ${types} != 0) { + stop("Number of words must be divisible by the number of types without a remainder") + } + + require(lattice , quietly=T) + require(latticeExtra , quietly=T) + require(gridExtra , quietly=T) + + args <- commandArgs(trailingOnly = TRUE) + source(file.path(args[1], "parseval.R")) + + if(!file.exists("${html_file.files_path}")) { + dir.create("${html_file.files_path}") + } + + write("<pre>", file="${html_file}", append=TRUE) + sink("${html_file}", append=TRUE) + + tryCatch({ + set.seed(2954) + parse<-parseEval(c1=c(${c1a},${c1b}), c2=c(${c2a},${c2b}), c3=c(${c3a},${c3b}), cipi12=c(${cipi12a},${cipi12b}), cipi23=c(${cipi23a},${cipi23b}), voweld=${voweld}, anchor=${anchor}, var_anchor=${var_anchor}, types=${types}, words=${words}, simN=${simN}, RE_rsd=${RE_rsd}, CC_rsd=${CC_rsd}, LE_rsd=${LE_rsd}) + }, error = function(e) { + stop("ParseEval produced an error with the given input parameters. Check the inputs to make sure they are correct.") + }) + + sink() + + write(paste("<br />Type of Simulation: ", parse[[2]], sep=""), file="${html_file}", append=TRUE) + write("</pre><br />", file="${html_file}", append=TRUE) + + write("<a href='simplex_plots.pdf'>Simple Onset Hypothesis Plots</a><br />", file="${html_file}", append=TRUE) + write("<a href='complex_plots.pdf'>Complex Onset Hypothesis Plots</a>", file="${html_file}", append=TRUE) + + p1_s<-xyplot((parse_s*100)~as.factor(anchorindex), groups=edge, type="o", pch=20, cex=0.4, lwd=.5, main="Change in stability pattern as a function of anchor variability", xlab="Anchorindex", ylab="Median RSD (%)", scales=list(x=list(cex=.48)), auto.key=list(space="top", text=c("CC to anchor" , "LE to anchor", "RE to anchor"), points=F, lines=T, columns=2, cex=.7, padding.text=2), data=parse[[3]]) + p1_c<-xyplot((parse_c*100)~as.factor(anchorindex), groups=edge, type="o", pch=20, cex=0.4, lwd=.5, main="Change in stability pattern as a function of anchor variability", xlab="Anchorindex", ylab="Median RSD (%)", scales=list(x=list(cex=.48)), auto.key=list(space="top", text=c("CC to anchor" , "LE to anchor", "RE to anchor"), points=F, lines=T, columns=2, cex=.7, padding.text=2), data=parse[[3]]) + p2_s<-xyplot(parse_s~as.factor(anchorindex), type="o", pch=20, lty=1, cex=0.5, main="Best-fitting variability level", xlab="Anchorindex", ylab="Hits (F-Statistics > 98.503)", scales= list(x=list(cex=.48)), data=parse[[4]]) + p2_c<-xyplot(parse_c~as.factor(anchorindex), type="o", pch=20, lty=1, cex=0.5, main="Best-fitting variability level", xlab="Anchorindex", ylab="Hits (F-Statistics > 98.503)", scales= list(x=list(cex=.48)), data=parse[[4]]) + rs.3<-xyplot(rs_median~as.factor(anchorindex), panel=function(...) { + panel.xyplot(...) + panel.abline(h = 1.0, lty = 2, lwd = .5) }, + type="o", pch=20, cex=0.4, lwd=.5, subset=parse=="simp", main="Median R-squared values across all simulations" , xlab="Anchorindex", ylab=expression("Median"~R^2), scales=list( x=list(cex=.48)), data=parse[[5]][[2]]) + rs.4<-xyplot(rs_median~as.factor(anchorindex), panel=function(...) { + panel.xyplot(...) + panel.abline(h = 1.0, lty = 2, lwd = .5) }, + type="o", pch=20, cex=0.4, lwd=.5, subset=parse=="comp", main="Median R-squared values across all simulations" , xlab="Anchorindex", ylab=expression("Median"~R^2), scales=list( x=list(cex=.48)), data=parse[[5]][[2]]) + + pdf(file.path("${html_file.files_path}", "simplex_plots.pdf")) + p1_s + p2_s + rs.3 + invisible(dev.off()) + + pdf(file.path("${html_file.files_path}", "complex_plots.pdf")) + p1_c + p2_c + rs.4 + invisible(dev.off()) + + </configfile> + </configfiles> + + <help> + <![CDATA[ + + ParseEval is a function for assessing certain aspects of articulatory data in phonetic research. It is intended to be used to evaluate the fit between measurements taken from experimental data and measurements taken from simulated data generated under different syllable parses. + + ]]> + + </help> + <citations> + <citation> + @inproceedings{Shaw2010, +abstract = {This paper develops computational tools for evaluating competing syllabic parses of a pho-nological string on the basis of temporal pat-terns in speech production data. This is done by constructing models linking syllable parses to patterns of coordination between articulato-ry events. Data simulated from different syl-labic parses are evaluated against experimental data from American English and Moroccan Arabic, two languages claimed to parse similar strings of segments into different syllabic structures. Results implicate a tautosyllabic parse of initial consonant clusters in English and a heterosyllabic parse of initial clusters in Arabic, in accordance with theoretical work on the syllable structure of these languages. It is further demonstrated that the model can cor-rectly diagnose syllable structure even when previously proposed phonetic heuristics for such structure do not clearly point to the cor-rect diagnosis.}, +author = {Shaw, Jason A and Gafos, Adamantios I}, +booktitle = {Proceedings of the 11th Meeting of the ACL-SIGMORPHON}, +number = {July}, +pages = {54--62}, +publisher = {Association for Computational Linguistics}, +title = {{Quantitative evaluation of competing syllable parses}}, +year = {2010} +} + </citation> + </citations> + + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/parseval.R Sat Mar 11 21:35:38 2017 -0500 @@ -0,0 +1,444 @@ +parseEval <- function (c1, c2, c3, cipi12, cipi23, voweld, anchor=15, var_anchor=3, types, words=12, simN=1000, RE_rsd, CC_rsd, LE_rsd) { + # basic validy-check ... input parameters + c1<-as.numeric(c1) + c2<-as.numeric(c2) + c3<-as.numeric(c3) + cipi12<-as.numeric(cipi12) + cipi23<-as.numeric(cipi23) + voweld<-as.numeric(voweld) + anchor<-as.integer(anchor) + var_anchor<-as.integer(var_anchor) + types<-as.integer(types) + words<-as.integer(words) + simN<-as.integer(simN) + RE_rsd<-as.numeric(RE_rsd) + CC_rsd<-as.numeric(CC_rsd) + LE_rsd<-as.numeric(LE_rsd) + # phonetic parameters + C1p<-c3[1] # plateau duration of the first consonant in triconsonantal clusters + C1stdv<-c3[2] # standard deviation of the first consonant in triconsonantal clusters + C2p<-c2[1] # plateau duration of the second consonant in triconsonantal clusters and the first consonant in bi-consonantal clusters + C2stdv<-c2[2] # standard deviation of the second consonant in triconsonantal clusters and the first consonant in bi-consonantal clusters + C3p<-c1[1] # plateau duration of the immediately prevocalic consonant + C3stdv<-c1[2] # standard deviation of the immediately prevocalic consonant + C12ipi<-cipi12[1] # the duration of the interval between the plateaus of the first two consonants in triconsonantal clusters + C12stdv<-cipi12[2] # the standard deviation of the interval between the plateaus of the first two consonants in triconsonantal clusters + C23ipi<-cipi23[1] # the duration of the interval between the plateaus of the first two consonants in biconsonantal clusters and between the second two consonants in triconsonantal clusters + C23stdv<-cipi23[2] # the standard deviation of the interval between the plateaus of the first two consonants in biconsonantal clusters and between the second two consonants in triconsonantal clusters + vowel_duration<-voweld # the duration of the vowel + variability_range<-anchor # number of stepwise increases in variability simulated by the model + variability_resolution<-var_anchor # size of each stepwise increase in variability + words_per_word_type<-words # the number of words (stimuli) per word type, aka "little n" + word_types<-types # number of different word types, e.g. #CV-, #CCV-, #CCCV- + data_RSD<-c(RE_rsd, LE_rsd, CC_rsd) # lumps RSD measures into single array + #creating matrices for later use + A_simp <- matrix(nrow=variability_range, ncol=words_per_word_type) + A_comp <- matrix(nrow=variability_range, ncol=words_per_word_type) + # creating matrices to hold the SD values + LE_SD_simp<-matrix(nrow=simN, ncol=variability_range, byrow=TRUE) + LE_SD_comp<-matrix(nrow=simN, ncol=variability_range, byrow=TRUE) + RE_SD_simp<-matrix(nrow=simN, ncol=variability_range, byrow=TRUE) + RE_SD_comp<-matrix(nrow=simN, ncol=variability_range, byrow=TRUE) + CC_SD_simp<-matrix(nrow=simN, ncol=variability_range, byrow=TRUE) + CC_SD_comp<-matrix(nrow=simN, ncol=variability_range, byrow=TRUE) + # creating matrices to hold the RSD values + LE_RSD_simp<-matrix(nrow=simN, ncol=variability_range, byrow=TRUE) + LE_RSD_comp<-matrix(nrow=simN, ncol=variability_range, byrow=TRUE) + RE_RSD_simp<-matrix(nrow=simN, ncol=variability_range, byrow=TRUE) + RE_RSD_comp<-matrix(nrow=simN, ncol=variability_range, byrow=TRUE) + CC_RSD_simp<-matrix(nrow=simN, ncol=variability_range, byrow=TRUE) + CC_RSD_comp<-matrix(nrow=simN, ncol=variability_range, byrow=TRUE) + if (word_types==3) { + tepa<-c("Testing Triads") + # print(c("Simulating Data (Triads)"), quote=F) + # pb <- txtProgressBar(min = 0, max = simN, style = 3) + for (count in 1:simN) { + # setTxtProgressBar(pb, count) + # generate CCC tokens + # generate timestamps for C3 (the prevocalic consonant) + # generate general error-term for C3 + e <- C3stdv*(rnorm(1)) + # generate R(ight plateau edge = Release) of prevocalic consonant + # generate words_per_word_type/word_types Gaussian distributed numbers (for CCC tokens only) + # with mean 500, variance 10 + CCCR3 <- rnorm(words_per_word_type/word_types, mean=500, sd=sqrt(20)) + # generate L(eft plateau edge = Target) of prevocalic consonant + CCCL3 <- CCCR3 - C3p + e #generate L3 corresponding to R3 by assuming a plateau duration of C3p + # calculate midpoint of prevocalic consonant plateau + CCCM3 <- ((CCCR3 + CCCL3) / 2) + # generate timestamps for C2 + # generate general error-term for C2 + e1 <- C23stdv * (rnorm(1)) #normally distributed random error mean=0, sd=1 + e2 <- C2stdv * (rnorm(1)) #normally distributed random error mean=0, sd=1 + # Generate right edge of C2 + CCCR2 <- CCCL3 - C23ipi + e1 # generate right edge of C2 from left edge of C3 assuming an ipi of C23ipi + # Generate left edge of C2 + CCCL2 <- CCCR2 - C2p + e2 # generate left edge from right edge by assuming a plateau duration + # Calculate midpoint of C2 + CCCM2 <- ((CCCR2+CCCL2)/2) + # generate timestamps for C1 + # generate general error-term for C1 + e1 <- C12stdv * (rnorm(1)) # normally distributed random error + e2 <- C3stdv * (rnorm(1)) + # Generate right edge + CCCR1 <- CCCL2 - C12ipi + e1 # generate right edge of C1 from left edge of C2 assuming ipi of 40ms + # generate L(eft plateau edge = Target) of C1 + CCCL1 <- CCCR1 - C3p + e2 # generate L2 corresponding to CR1 by assuming a plateau of 10ms + # calculate midpoint of prevocalic consonant + CCCM1 <- ((CCCR1 + CCCL1)/2) # right edge of C1 + #generate CC tokens + #generate timestamps for C3 (prevocalic consonant) + # generate general error-term for C3 + e <- C3stdv * (rnorm(1)) # normally distributed random error + # generate R(ight plateau edge = Release) of prevocalic consonant + CCR3 <- rnorm(words_per_word_type/word_types, mean=500, sd=sqrt(20)) # generate N Gaussian distributed numbers with mean 500, variance 10 + # generate L(eft plateau edge = Target) of prevocalic consonant + CCL3 <- CCR3 - C3p + e # generate L3 corresponding to R3 by assuming a plateau duration of C3p + # calculate midpoint of prevocalic consonant plateau + CCM3 <- ((CCR3 + CCL3) / 2) + #generate timestamps for C2 + # generate general error-term for C2 + e1 <- C23stdv * (rnorm(1)) + e2 <- C2stdv * (rnorm(1)) + # Generate right edge of C2 + CCR2 <- CCL3 - C23ipi + e1 # generate right edge of C2 from left edge of C3 assuming an ipi of C23ipi + # Generate left edge of C2 + CCL2 <- CCR2 - C2p + e2 # generate left edge from right edge by assuming a plateau duration + # Calculate midpoint of C2 + CCM2 <- ((CCR2 + CCL2) / 2) + # generate C tokens + # generate timestamps for C3 (the prevocalic consonant) + # generate general error-term for C3 + e <- C3stdv * (rnorm(1)) + # Generate R(ight plateau edge = Release) of prevocalic consonant + CR3 <- rnorm(words_per_word_type/word_types, mean=500, sd=sqrt(20)) # generate N Gaussian distributed numbers with mean 500, variance 10 + # generate L(eft plateau edge = Target) of prevocalic consonant + CL3 <- CR3 - C3p + e # generate L3 corresponding to R3 by assuming a plateau duration of C3p + # calculate midpoint of prevocalic consonant plateau + CM3 <- ((CR3 + CL3) / 2) + # generate timestamps for CCglobal + # for CCC clusters + CCglobal <- apply(cbind(CCCM1, CCCM2, CCCM3), 1, mean) #mean of consonant plateaux midpoints + # for CC clusters + CCglobal <- append(CCglobal, apply(cbind(CCM2, CCM3), 1, mean)) # mean of consonant plateaux midpoints + # for C clusters + CCglobal <- append(CCglobal, CM3) + # populate a single array with the midpoint of the pre-vocalic + # consonant of every word type; this array will be used to generate anchors + # for CCC clusters + Global_CM3 <- CCCM3 # mean of consonant plateaux midpoints + # for CC clusters + Global_CM3 <- append(Global_CM3, CCM3) # mean of consonant plateaux midpoints + # for C clusters + Global_CM3 <- append(Global_CM3, CM3) + # populate a single array with the Left_edge of the consonant cluster for every token + # this array will be used to calculate SD and RSD for EDGE to Anchor intervals + # for CCC clusters + Global_CL1 <- CCCL1 # Assigns the left edge of tri-consonantal tokens to the first third of Global_Cl1 + # for CC clusters + Global_CL1 <- append(Global_CL1, CCL2) # Assigns the left edge of bi-consonantal tokens to the second third of Global_Cl1 + # for C clusters + Global_CL1 <- append(Global_CL1, CL3) # Assigns the left edge of mono-consonantal tokens to the last third of Global_Cl1 + # populate a single array with the Right_edge of the consonant cluster for every token + # this array is used to calculate SD and RSD for EDGE to Anchor intervals + # for CCC clusters + Global_CR3 <- CCCR3 # mean of consonant plateaux midpoints + # for CC clusters + Global_CR3 <- append(Global_CR3, CCR3) # mean of consonant plateaux midpoints + # for C clusters + Global_CR3 <- append(Global_CR3, CR3) # CCglobal synchronous with prevocalic consonant's plateau midpoint + # generate series of anchor points increasing in variability and/or distance from + # the prevocalic consonant reset the anchor array to zero + # one row for each anchor and one column for each token + + # loop produces data/anchor for each token based on Simplex Hypothesis + stdv <- 0 # reset the value of the anchor stdev to zero + Ae <- NULL # reset anchor-error-term + for (cycle in 1:variability_range){ #creates multiple anchor points for each token + for (m in 1:words_per_word_type){ #creates anchor point for each token from the right edge of the token + Ae<-stdv*(rnorm(n=1)) #normally distributed random error, assuming mean of 0 + A_simp[cycle, m] <- Global_CM3[m] + vowel_duration + Ae #generate anchor A according to the simplex onset hypothesis + } + stdv<-stdv+variability_resolution #creates new anchor point + } + # loop produces data/anchor for each token based on Complex Hypothesis + stdv <- 0 # reset the value of the anchor stdev to zero + Ae <- NULL # reset anchor-error-term + for (cycle in 1:variability_range) { #creates multiple anchor points for each token + for (m in 1:words_per_word_type) { #creates anchor point for each token from the right edge of the token + Ae<-stdv*(rnorm(1)) #normally distributed random error, assuming mean of 0 + A_comp[cycle, m]<-CCglobal[m]+vowel_duration+Ae #generate anchor A according to the complex onset hypothesis + } + stdv<-stdv+variability_resolution #creates new anchor point + } + # Note about consonantal landmarks: + # they are replaced with each cycle of the simulation + # in constrast, RSD values for each landmark are stored across simulations. + # creating matrices to hold the SD values + x <- function(x) {sd(x-Global_CL1)} + y <- function(y) {sd(y-Global_CR3)} + z <- function(z) {sd(z-CCglobal)} + # computing the SD values + LE_SD_simp[count,] <- apply(A_simp, 1, x) + LE_SD_comp[count,] <- apply(A_comp, 1, x) + RE_SD_simp[count,] <- apply(A_simp, 1, y) + RE_SD_comp[count,] <- apply(A_comp, 1, y) + CC_SD_simp[count,] <- apply(A_simp, 1, z) + CC_SD_comp[count,] <- apply(A_comp, 1, z) + # computing the RSD values + LE_RSD_simp[count,] <- (apply(A_simp, 1, x))/((apply(A_simp, 1, mean))-mean(Global_CL1)) + LE_RSD_comp[count,] <- (apply(A_comp, 1, x))/((apply(A_comp, 1, mean))-mean(Global_CL1)) + RE_RSD_simp[count,] <- (apply(A_simp, 1, y))/((apply(A_simp, 1, mean))-mean(Global_CR3)) + RE_RSD_comp[count,] <- (apply(A_comp, 1, y))/((apply(A_comp, 1, mean))-mean(Global_CR3)) + CC_RSD_simp[count,] <- (apply(A_simp, 1, z))/(apply(A_simp, 1, mean)-mean(CCglobal)) + CC_RSD_comp[count,] <- (apply(A_comp, 1, z))/(apply(A_comp, 1, mean)-mean(CCglobal)) + } + # close(pb) + } + if (word_types==2) { + tepa<-c("Testing Dyads") + # print(c("Simulating Data (Dyads)"), quote=F) + # pb <- txtProgressBar(min = 0, max = simN, style = 3) + for (count in 1:simN) { + # setTxtProgressBar(pb, count) + # generate CCC tokens + # generate timestamps for C3 (the prevocalic consonant) + # generate general error-term for C3 + e <- C3stdv * (rnorm(1)) + # generate R(ight plateau edge = Release) of prevocalic consonant + # generate words_per_word_type/word_types Gaussian distributed numbers (for CCC tokens only) + # with mean 500, variance 10 + CCCR3 <- rnorm(words_per_word_type/word_types, mean=500, sd=sqrt(20)) + # generate L(eft plateau edge = Target) of prevocalic consonant + CCCL3 <- abs(CCCR3 - C3p + e) #generate L3 corresponding to R3 by assuming a plateau duration of C3p + # calculate midpoint of prevocalic consonant plateau + CCCM3 <- abs((CCCR3 + CCCL3) / 2) + # generate timestamps for C2 + # generate general error-term for C2 + e1 <- C23stdv * (rnorm(1)) #normally distributed random error + e2 <- C2stdv * (rnorm(1)) #normally distributed random error + # Generate right edge of C2 + CCCR2 <- abs(CCCL3 - C23ipi + e1) # generate right edge of C2 from left edge of C3 assuming an ipi of C23ipi + # Generate left edge of C2 + CCCL2 <- abs(CCCR2 - C2p + e2) # generate left edge from right edge by assuming a plateau duration + # Calculate midpoint of C2 + CCCM2 <- abs((CCCR2 + CCCL2) / 2) + # generate timestamps for C1 + # generate general error-term for C1 + e1 <- C12stdv * (rnorm(1)) # normally distributed random error + e2<-C3stdv * (rnorm(1)) + # Generate right edge + CCCR1 <- abs(CCCL2 - C12ipi + e1) # generate right edge of C1 from left edge of C2 assuming ipi of 40ms + # generate L(eft plateau edge = Target) of C1 + CCCL1 <- abs(CCCR1 - C3p + e2) # generate L2 corresponding to CR1 by assuming a plateau of 10ms + # calculate midpoint of prevocalic consonant + CCCM1 <- abs((CCCR1 + CCCL1) / 2) # right edge of C1 + #generate CC tokens + #generate timestamps for C3 (prevocalic consonant) + # generate general error-term for C3 + e <- C3stdv * (rnorm(1)) # normally distributed random error, 0 mean + # generate R(ight plateau edge = Release) of prevocalic consonant + CCR3 <- rnorm(words_per_word_type/word_types, mean=500, sd=sqrt(20)) # generate N Gaussian distributed numbers with mean 500, variance 10 + # generate L(eft plateau edge = Target) of prevocalic consonant + CCL3 <- abs(CCR3 - C3p + e) # generate L3 corresponding to R3 by assuming a plateau duration of C3p + # calculate midpoint of prevocalic consonant plateau + CCM3 <- abs((CCR3 + CCL3) / 2) + #generate timestamps for C2 + # generate general error-term for C2 + e1 <- C23stdv * (rnorm(1)) + e2 <- C2stdv * (rnorm(1)) + # Generate right edge of C2 + CCR2 <- abs(CCL3 - C23ipi + e) # generate right edge of C2 from left edge of C3 assuming an ipi of C23ipi + # Generate left edge of C2 + CCL2 <- abs(CCR2 - C2p + e) # generate left edge from right edge by assuming a plateau duration + # Calculate midpoint of C2 + CCM2 <- abs((CCR2 + CCL2) / 2) + # generate timestamps for CCglobal + # for CCC clusters + CCglobal <- apply(cbind(CCCM1, CCCM2, CCCM3), 1, mean) #mean of consonant plateaux midpoints + # for CC clusters + CCglobal <- append(CCglobal, apply(cbind(CCM2, CCM3), 1, mean)) # mean of consonant plateaux midpoints + # populate a single array with the midpoint of the pre-vocalic + # consonant of every word type; this array will be used to generate anchors + # for CCC clusters + Global_CM3 <- CCCM3 # mean of consonant plateaux midpoints + # for CC clusters + Global_CM3 <- append(Global_CM3, CCM3, after=length(CCCM3)) # mean of consonant plateaux midpoints + # populate a single array with the Left_edge of the consonant cluster for every token + # this array will be used to calculate SD and RSD for EDGE to Anchor intervals + # for CCC clusters + Global_CL1 <- CCCL1 # Assigns the left edge of tri-consonantal tokens to the first third of Global_Cl1 + # for CC clusters + Global_CL1 <- append(Global_CL1, CCL2, after=length(CCCL1)) # Assigns the left edge of bi-consonantal tokens to the second third of Global_Cl1 + # populate a single array with the Right_edge of the consonant cluster for every token + # this array is used to calculate SD and RSD for EDGE to Anchor intervals + # for CCC clusters + Global_CR3 <- CCCR3 # mean of consonant plateaux midpoints + # for CC clusters + Global_CR3 <- append(Global_CR3, CCR3, after=length(CCCR3)) # mean of consonant plateaux midpoints + # generate series of anchor points increasing + # in variability and/or distance from the prevocalic consonant + stdv <- 0 + Ae <- NULL + for (cycle in 1:variability_range){ #creates multiple anchor points for each token + for (m in 1:words_per_word_type){ #creates anchor point for each token from the right edge of the token + Ae<-stdv*(rnorm(n=1)) #normally distributed random error, assuming mean of 0 + A_simp[cycle, m]<-Global_CM3[m] + vowel_duration + Ae #generate anchor A according to the simplex onset hypothesis + } + stdv<-stdv+variability_resolution + } + # loop produces anchor for each token based on Complex Hypothesis + stdv <- 0 + Ae <- NULL + for (cycle in 1:variability_range){ #creates multiple anchor points for each token + for (m in 1:words_per_word_type){ #creates anchor point for each token from the right edge of the token + Ae<-stdv*(rnorm(1)) #normally distributed random error, assuming mean of 0 + A_comp[cycle, m]<-CCglobal[m]+vowel_duration+Ae #generate anchor A according to the complex onset hypothesis + } + stdv<-stdv+variability_resolution #creates new anchor point + } + #creating matrices to hold the SD values + x <- function(x) {sd(x-Global_CL1)} + y <- function(y) {sd(y-Global_CR3)} + z <- function(z) {sd(z-CCglobal)} + # computing the SD values + LE_SD_simp[count,] <- abs(apply(A_simp, 1, x)) + LE_SD_comp[count,] <- abs(apply(A_comp, 1, x)) + RE_SD_simp[count,] <- abs(apply(A_simp, 1, y)) + RE_SD_comp[count,] <- abs(apply(A_comp, 1, y)) + CC_SD_simp[count,] <- abs(apply(A_simp, 1, z)) + CC_SD_comp[count,] <- abs(apply(A_comp, 1, z)) + # computing the RSD values + LE_RSD_simp[count,] <- abs((apply(A_simp, 1, x))/((apply(A_simp, 1, mean))-mean(Global_CL1))) + LE_RSD_comp[count,] <- abs((apply(A_comp, 1, x))/((apply(A_comp, 1, mean))-mean(Global_CL1))) + RE_RSD_simp[count,] <- abs((apply(A_simp, 1, y))/((apply(A_simp, 1, mean))-mean(Global_CR3))) + RE_RSD_comp[count,] <- abs((apply(A_comp, 1, y))/((apply(A_comp, 1, mean))-mean(Global_CR3))) + CC_RSD_simp[count,] <- abs((apply(A_simp, 1, z))/(apply(A_simp, 1, mean)-mean(CCglobal))) + CC_RSD_comp[count,] <- abs((apply(A_comp, 1, z))/(apply(A_comp, 1, mean)-mean(CCglobal))) + } + } + # close(pb) + # pb <- txtProgressBar(min = 1, max = variability_range, style = 3) + # assorted variables for diagnostics / plotting + aip_1<-rep(c(1:variability_range), 3) + edgep_1<-rep(c("LE_RSD", "RE_RSD", "CC_RSD"), each=variability_range) + LE_RSD_simp_median<-apply(apply(LE_RSD_simp, 2, sort), 2, median) + RE_RSD_simp_median<-apply(apply(RE_RSD_simp, 2, sort), 2, median) + CC_RSD_simp_median<-apply(apply(CC_RSD_simp, 2, sort), 2, median) + LE_RSD_comp_median<-apply(apply(LE_RSD_comp, 2, sort), 2, median) + RE_RSD_comp_median<-apply(apply(RE_RSD_comp, 2, sort), 2, median) + CC_RSD_comp_median<-apply(apply(CC_RSD_comp, 2, sort), 2, median) + simp<-c(LE_RSD_simp_median, RE_RSD_simp_median, CC_RSD_simp_median) + comp<-c(LE_RSD_comp_median, RE_RSD_comp_median, CC_RSD_comp_median) + RE_RSD_median<-c(RE_RSD_simp_median, RE_RSD_comp_median) + CC_RSD_median<-c(CC_RSD_simp_median, CC_RSD_comp_median) + # median RDSs across simulations as a function of anchorindex + plot.1<-data.frame(anchorindex=aip_1, edge=edgep_1, parse_s=simp, parse_c=comp) + # aggregating data for goodness of fit evaluation + RE_RSD_simp<-t(RE_RSD_simp) + LE_RSD_simp<-t(LE_RSD_simp) + CC_RSD_simp<-t(CC_RSD_simp) + RE_RSD_comp<-t(RE_RSD_comp) + LE_RSD_comp<-t(LE_RSD_comp) + CC_RSD_comp<-t(CC_RSD_comp) + # looping through the data to get the gof results + data_simp<-matrix(ncol=4) + data_comp<-matrix(ncol=4) + sigfit<-function(x) { + if(x > 98.503) { + SigFit<-1 + } else { + SigFit<-0 + } + } + # analyzing data simplex + # print(c("Analysing... simple parse"), quote=F) + for (i in 1 : variability_range) { + # setTxtProgressBar(pb, i) + sim_RSD<-cbind(RE_RSD_simp[i,], LE_RSD_simp[i,],CC_RSD_simp[i,]) + temp<-apply(sim_RSD, 1, function(x) (lm(data_RSD ~ x))) + # organizing data for final analyses + # creating anchor-index + anchor_idx<-rep(i, times=simN) + # extracting F-Statistics + fstat<-unlist(lapply(temp, function(x) summary(x)$fstatistic[1])) + # extracting R-Squared values + rsquared<-unlist(lapply(temp, function(x) summary(x)$r.squared)) + # check for SigFit + sgf<-sapply(fstat, sigfit) + # aggregating data + agg_mat<-matrix(data=c(anchor_idx, fstat, rsquared, sgf), nrow=length(anchor_idx), ncol=4, dimnames=list(c(NULL), c("Anchorindex", "Fratio", "Rsquared", "SigFit"))) + # adding sgf to existing data + data_simp<-rbind(data_simp, agg_mat) + } + outp_sp<-temp + data_simp<-data_simp[complete.cases(data_simp),] + data_simp<-as.data.frame(data_simp) + data_simp$Anchorindex<-as.factor(data_simp$Anchorindex) + output_simp<-tapply(data_simp$SigFit, data_simp$Anchorindex, sum) + # analyzing data complex + # print(c("Analysing... complex parse"), quote=F) + for (i in 1 : variability_range) { + # setTxtProgressBar(pb, i) + sim_RSD<-cbind(RE_RSD_comp[i,], LE_RSD_comp[i,],CC_RSD_comp[i,]) + temp<-apply(sim_RSD, 1, function(x) (lm(data_RSD ~ x))) + # organizing data for final analyses + anchor_idx<-rep(i, times=simN) + # extracting F-Statistics + fstat<-unlist(lapply(temp, function(x) summary(x)$fstatistic[1])) + # extracting R-Squared values + rsquared<-unlist(lapply(temp, function(x) summary(x)$r.squared)) + # check for SigFit + sgf<-sapply(fstat, sigfit) + # aggregating data + agg_mat<-matrix(data=c(anchor_idx, fstat, rsquared, sgf), nrow=length(anchor_idx), ncol=4, dimnames=list(c(NULL), c("Anchorindex", "Fratio", "Rsquared", "SigFit"))) + # adding sgf to existing data + data_comp<-rbind(data_comp, agg_mat) + } + outp_cp<-temp + data_comp<-data_comp[complete.cases(data_comp),] + data_comp<-as.data.frame(data_comp) + data_comp$Anchorindex<-as.factor(data_comp$Anchorindex) + output_comp<-tapply(data_comp$SigFit, data_comp$Anchorindex, sum) + # diagnostic plot 2 + output_plot.2<-cbind(output_simp, output_comp) + names(output_plot.2)<-NULL + colnames(output_plot.2)<-c("parse_s", "parse_c") + aip_2<-(1:variability_range) + plot.2<-data.frame(anchorindex=aip_2, output_plot.2, hitr_s=(output_simp/simN), hitr_c=(output_comp/simN)) + # assessing overall model quality + # sum of hits per number of simulations + modq_s<-(sum(plot.2[,2]))/simN + modq_c<-(sum(plot.2[,3]))/simN + # assorted data for third diagnostic plot + # sorting by Rsquared (asc), tie-breaker by Fratio (asc) + data_simp_o<-data_simp[order(data_simp[,3], data_simp[,2]),] + data_comp_o<-data_comp[order(data_comp[,3], data_comp[,2]),] + aip_3<-rep(c(1:variability_range), 2) + parse.f<-rep(c("simp","comp"), each=variability_range) + # median + simp_rs_median<-tapply(data_simp_o$Rsquared, data_simp_o$Anchorindex, median) + comp_rs_median<-tapply(data_comp_o$Rsquared, data_comp_o$Anchorindex, median) + simp_fr_median<-tapply(data_simp_o$Fratio, data_simp_o$Anchorindex, median) + comp_fr_median<-tapply(data_comp_o$Fratio, data_comp_o$Anchorindex, median) + rs_median<-c(simp_rs_median, comp_rs_median) + fr_median<-c(simp_fr_median, comp_fr_median) + plot.3_median<-data.frame(anchorindex=aip_3, parse=parse.f, rs_median=rs_median, fr_median=fr_median) + # mean + simp_rs_mean<-tapply(data_simp_o$Rsquared, data_simp_o$Anchorindex, mean) + comp_rs_mean<-tapply(data_comp_o$Rsquared, data_comp_o$Anchorindex, mean) + simp_fr_mean<-tapply(data_simp_o$Fratio, data_simp_o$Anchorindex, mean) + comp_fr_mean<-tapply(data_comp_o$Fratio, data_comp_o$Anchorindex, mean) + rs_mean<-c(simp_rs_mean, comp_rs_mean) + fr_mean<-c(simp_fr_mean, comp_fr_mean) + plot.3_mean<-data.frame(anchorindex=aip_3, parse=parse.f, rs_mean=rs_mean, fr_mean=fr_mean) + # prepare for output + # close(pb) + output<-list("perf"=list("Performance Simple"=modq_s, "Performance Complex"=modq_c), "mode"=tepa, "Plot_1"=plot.1, "Plot_2"=plot.2, "Plot_3"=list("mean"=plot.3_mean, "median"=plot.3_median), "reg"=list("simplex_parse"<-outp_sp, "complex_parse"<-outp_cp), "sim_RSD"=list("simp"=data_simp, "comp"=data_comp)) + cat("\n", "\n","Overall Quality of Modell-Performance", "\t", "(", tepa, ")", "\n", + "(Ratio of:","\t", "Total Number of Hits / Number of Simulations)","\n", + "------------------------","\n", + "Simple Modelling:", "\t", modq_s, "\t","\t","\t","\t", sum(plot.2[,2])," / ", simN, "\n", "\n", + "Complex Modelling:", "\t", modq_c, "\t","\t","\t","\t", sum(plot.2[,3])," / ", simN, "\n", "\n", sep="") + return(invisible(output)) +} \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/r_wrapper.sh Sat Mar 11 21:35:38 2017 -0500 @@ -0,0 +1,23 @@ +#!/bin/sh + +### Run R providing the R script in $1 as standard input and passing +### the remaining arguments on the command line + +# Function that writes a message to stderr and exits +fail() +{ + echo "$@" >&2 + exit 1 +} + +# Ensure R executable is found +which R > /dev/null || fail "'R' is required by this tool but was not found on path" + +# Extract first argument +infile=$1; shift + +# Ensure the file exists +test -f $infile || fail "R input file '$infile' does not exist" + +# Invoke R passing file named by first argument to stdin +R --vanilla --slave --args $* < $infile