# HG changeset patch # User deepakjadmin # Date 1453354498 18000 # Node ID ee374e48024f9850884a058bd20d00ccfa9bf586 Uploaded diff -r 000000000000 -r ee374e48024f tool1/csv2rdatatrain.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool1/csv2rdatatrain.R Thu Jan 21 00:34:58 2016 -0500 @@ -0,0 +1,29 @@ +args <- commandArgs(TRUE) +####source("/home/galaxy/galaxy-dist/tools/mpdstools/tool1/csv2rdatatrain2.R") + +csv2rdatatrain <- function(arg1,arg2) +{ + file <- read.csv(arg1,row.names =1, header=T) + col <- ncol(file) + stopifnot(is.null(file) | col > 2 ) + + #cat("the Outcome column is not a factor vector.\n",file=stderr()) + stopifnot(is.factor(file[,col])) + + if(levels(file[,col])[1] != ""){ + dataX <- file[,1:(col-1)] + dataY <- file[,col] + stopifnot(nrow(dataX) == length(dataY)) + save(dataX,dataY,file=arg2) + } + else{ + cat("the Outcome column has less number of entry than number of compounds.please check input file.\n",file=stderr()) + } + } + +csv2rdatatrain(args[1],args[2]) + + + + + diff -r 000000000000 -r ee374e48024f tool1V2.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool1V2.xml Thu Jan 21 00:34:58 2016 -0500 @@ -0,0 +1,38 @@ + + +This Tool Converts Input csv File into RData Which Is Further Used for Model Building + + + + R_ROOT_DIR + R + + tool1/csv2rdatatrain.R $file1 $RData + + + + + + + +.. class:: infomark + +**Input "csv file" format must be as given below :** + + +"",feature1,feaure2,feature3,..,activity + + +cpd1,623,0.4,3.4,..,Active + +cpd2,234,0.9,5.6,..,Inactive + +cpd3,567,0.5,3.14,..,Active + +cpd4,231,0.1,1.2,..,Inactive + +here "cpd" stands for name or id of a compound. It is rowname with no column header. + + + + diff -r 000000000000 -r ee374e48024f tool2.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool2.xml Thu Jan 21 00:34:58 2016 -0500 @@ -0,0 +1,589 @@ + + + used to create script file from user given input to build a model + + + R_ROOT_DIR + R + caret-tools + + +#if $OPTION11.PARAMETERS == "Advance" +tool2/modelBuilding.py --method $METHOD $RDATA --getdatainfoeval $OPTION11.GETDATAINFOEVAL --getdatainfoecho $OPTION11.GETDATAINFOECHO --getdatainforesult $OPTION11.GETDATAINFORESULT --missingfiltereval $OPTION11.CON1.MISSINGFILTEREVAL --missingfilterecho $OPTION11.CON1.MISSINGFILTERECHO --missingfilterresult $OPTION11.CON1.MISSINGFILTERRESULT --missingfilterthreshc $OPTION11.CON1.MISSINGFILTERTHRESHC --missingfilterthreshr $OPTION11.CON1.MISSINGFILTERTHRESHR --pcaeval $OPTION11.PCAEVAL --pcaecho $OPTION11.PCAECHO --pcaresult $OPTION11.PCARESULT --pcacomp $OPTION11.PCACOMP --pcaploteval $OPTION11.PCAPLOTEVAL --pcaplotecho $OPTION11.PCAPLOTECHO --pcaplotresult $OPTION11.PCAPLOTRESULT --pcaplotfig $OPTION11.PCAPLOTFIG --initialdataspliteval $OPTION11.CON2.INITIALDATASPLITEVAL --initialdatasplitecho $OPTION11.CON2.INITIALDATASPLITECHO --initialdatasplitresult $OPTION11.CON2.INITIALDATASPLITRESULT --saampling $OPTION11.CON2.SAAMPLING --percent $OPTION11.CON2.PERCENT --nzveval $OPTION11.CON3.NZVEVAL --nzvresult $OPTION11.CON3.NZVRESULT --nzvecho $OPTION11.CON3.NZVECHO --corrfiltereval $OPTION11.CON4.CORRFILTEREVAL --corrfilterresult $OPTION11.CON4.CORRFILTERRESULT --corrfilterecho $OPTION11.CON4.CORRFILTERECHO --threshholdcor $OPTION11.CON4.THRESHHOLDCOR --preproceval $OPTION11.CON5.PREPROCEVAL --preprocecho $OPTION11.CON5.PREPROCECHO --preprocresult $OPTION11.CON5.PREPROCRESULT --setupworkersecho $OPTION11.SETUPWORKERSECHO --setupworkersresult $OPTION11.SETUPWORKERSRESULT --numworkers $OPTION11.NUMWORKERS --setupresamplingecho $OPTION11.CON6.SETUPRESAMPLINGECHO --setupresamplingresult $OPTION11.CON6.SETUPRESAMPLINGRESULT --resampname $OPTION11.CON6.RESAMPNAME --resamplenumber $OPTION11.CON6.RESAMPLENUMBER --numrepeat $OPTION11.CON6.NUMREPEAT --resamplenumberpercent $OPTION11.CON6.RESAMPLENUMBERPERCENT --setupgridresult $OPTION11.SETUPGRIDRESULT --setupgridecho $OPTION11.SETUPGRIDECHO --setupgridsize $OPTION11.SETUPGRIDSIZE --fitmodelresult $OPTION11.FITMODELRESULT --fitmodelecho $OPTION11.FITMODELECHO --fitmodeleval $OPTION11.FITMODELEVAL --modeldescrecho $OPTION11.MODELDESCRECHO --modeldescrresult $OPTION11.MODELDESCRRESULT --resamptableecho $OPTION11.RESAMPTABLEECHO --resamptableresult $OPTION11.RESAMPTABLERESULT --profileplotecho $OPTION11.PROFILEPLOTECHO --profileplotfig $OPTION11.PROFILEPLOTFIG --stopworkersecho $OPTION11.STOPWORKERSECHO --stopworkersresult $OPTION11.STOPWORKERSRESULT --testpredresult $OPTION11.TESTPREDRESULT --testpredecho $OPTION11.TESTPREDECHO --classprobstexresult $OPTION11.CLASSPROBSTEXRESULT --classprobstexecho $OPTION11.CLASSPROBSTEXECHO --classprobstexresult1 $OPTION11.CLASSPROBSTEXRESULT1 --classprobstexecho1 $OPTION11.CLASSPROBSTEXECHO1 --savedataecho $OPTION11.SAVEDATAECHO --savedataresult $OPTION11.SAVEDATARESULT --datasets $datasets --outputmodel $model --outputresultpdf $document; +#end if +#if $OPTION11.PARAMETERS == "basic" +tool2/modelBuilding.py --method $METHOD $RDATA --getdatainfoeval $OPTION11.GETDATAINFOEVAL --getdatainfoecho $OPTION11.GETDATAINFOECHO --getdatainforesult $OPTION11.GETDATAINFORESULT --missingfiltereval $OPTION11.MISSINGFILTEREVAL --missingfilterecho $OPTION11.MISSINGFILTERECHO --missingfilterresult $OPTION11.MISSINGFILTERRESULT --missingfilterthreshc $OPTION11.MISSINGFILTERTHRESHC --missingfilterthreshr $OPTION11.MISSINGFILTERTHRESHR --pcaeval $OPTION11.PCAEVAL --pcaecho $OPTION11.PCAECHO --pcaresult $OPTION11.PCARESULT --pcacomp $OPTION11.PCACOMP --pcaploteval $OPTION11.PCAPLOTEVAL --pcaplotecho $OPTION11.PCAPLOTECHO --pcaplotresult $OPTION11.PCAPLOTRESULT --pcaplotfig $OPTION11.PCAPLOTFIG --initialdataspliteval $OPTION11.INITIALDATASPLITEVAL --initialdatasplitecho $OPTION11.INITIALDATASPLITECHO --initialdatasplitresult $OPTION11.INITIALDATASPLITRESULT --saampling $OPTION11.SAAMPLING --percent $OPTION11.PERCENT --nzveval $OPTION11.NZVEVAL --nzvresult $OPTION11.NZVRESULT --nzvecho $OPTION11.NZVECHO --corrfiltereval $OPTION11.CORRFILTEREVAL --corrfilterresult $OPTION11.CORRFILTERRESULT --corrfilterecho $OPTION11.CORRFILTERECHO --threshholdcor $OPTION11.THRESHHOLDCOR --preproceval $OPTION11.PREPROCEVAL --preprocecho $OPTION11.PREPROCECHO --preprocresult $OPTION11.PREPROCRESULT --setupworkersecho $OPTION11.SETUPWORKERSECHO --setupworkersresult $OPTION11.SETUPWORKERSRESULT --numworkers $OPTION11.NUMWORKERS --setupresamplingecho $OPTION11.SETUPRESAMPLINGECHO --setupresamplingresult $OPTION11.SETUPRESAMPLINGRESULT --resampname $OPTION11.RESAMPNAME --resamplenumber $OPTION11.RESAMPLENUMBER --numrepeat $OPTION11.NUMREPEAT --resamplenumberpercent $OPTION11.RESAMPLENUMBERPERCENT --setupgridresult $OPTION11.SETUPGRIDRESULT --setupgridecho $OPTION11.SETUPGRIDECHO --setupgridsize $OPTION11.SETUPGRIDSIZE --fitmodelresult $OPTION11.FITMODELRESULT --fitmodelecho $OPTION11.FITMODELECHO --fitmodeleval $OPTION11.FITMODELEVAL --modeldescrecho $OPTION11.MODELDESCRECHO --modeldescrresult $OPTION11.MODELDESCRRESULT --resamptableecho $OPTION11.RESAMPTABLEECHO --resamptableresult $OPTION11.RESAMPTABLERESULT --profileplotecho $OPTION11.PROFILEPLOTECHO --profileplotfig $OPTION11.PROFILEPLOTFIG --stopworkersecho $OPTION11.STOPWORKERSECHO --stopworkersresult $OPTION11.STOPWORKERSRESULT --testpredresult $OPTION11.TESTPREDRESULT --testpredecho $OPTION11.TESTPREDECHO --classprobstexresult $OPTION11.CLASSPROBSTEXRESULT --classprobstexecho $OPTION11.CLASSPROBSTEXECHO --classprobstexresult1 $OPTION11.CLASSPROBSTEXRESULT1 --classprobstexecho1 $OPTION11.CLASSPROBSTEXECHO1 --savedataecho $OPTION11.SAVEDATAECHO --savedataresult $OPTION11.SAVEDATARESULT --datasets $datasets --outputmodel $model --outputresultpdf $document; +#end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +.. class:: infomark + + + +**Instruction** + +---------- + +Users may change any parameter as their requirement. For normal practice + +user required to provide only input csv file and method for model building. + +More details are given in user manual.Please click here + + + + + + + + + diff -r 000000000000 -r ee374e48024f tool2/.templateLibrary.py.swp.1 Binary file tool2/.templateLibrary.py.swp.1 has changed diff -r 000000000000 -r ee374e48024f tool2/modelBuilding.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool2/modelBuilding.py Thu Jan 21 00:34:58 2016 -0500 @@ -0,0 +1,183 @@ +def __inputArguments(): + + import argparse + parser = argparse.ArgumentParser() + + parser.add_argument("--method", nargs='?',default ='pls',help="Name of the method on which model will build; \ + Available Methods are:- pls, glm , glmboost ") + parser.add_argument("rdata",help="Descriptor file for model building") + parser.add_argument("--getdatainfoeval",nargs='?',default='TRUE',help="Validation of the data ") + parser.add_argument("--getdatainfoecho",nargs='?',default='FALSE',help="print on consol about Validity of the data ") + parser.add_argument("--getdatainforesult",nargs='?',default='hide',help="print in output file about Validity of the data ") + parser.add_argument("--missingfiltereval",nargs='?',default='FALSE',help="Processing step :: removal of missing value ") + parser.add_argument("--missingfilterecho",nargs='?',default='FALSE',help="print Processing step :: removal of missing value ") + parser.add_argument("--missingfilterresult",nargs='?',default='hide',help="print in output file about Processing step :: removal of missing value ") + parser.add_argument("--missingfilterthreshc",nargs='?',default=0.20,type=float,help="info about highly missing column data") + parser.add_argument("--missingfilterthreshr",nargs='?',default=0.20,type=float,help="info about highly missing row number") + parser.add_argument("--pcaeval",nargs='?',default='FALSE',help="PCA of data ") + parser.add_argument("--pcaecho",nargs='?',default='FALSE',help="PCA of data ") + parser.add_argument("--pcaresult",nargs='?',default='hide',help="print in file about PCA of data ") + parser.add_argument("--pcacomp",nargs='?',default=3,type=int,help="Number of PCA componant will be plotted ") + parser.add_argument("--pcaploteval",nargs='?',default='FALSE',help="PCA plot of data ") + parser.add_argument("--pcaplotecho",nargs='?',default='FALSE',help="print PCA plot of data ") + parser.add_argument("--pcaplotresult",nargs='?',default='hide',help="write in output file about PCA plot of data") + parser.add_argument("--pcaplotfig",nargs='?',default='TRUE',help="make figure file for integration in output file") + parser.add_argument("--initialdataspliteval",nargs='?',default='TRUE',help="data splitting in test and train set ") + parser.add_argument("--initialdatasplitecho",nargs='?',default='FALSE',help="print about data splitting in test and train set") + parser.add_argument("--initialdatasplitresult",nargs='?',default='hide',help="write in outputfile about data splitting in test and train set") + parser.add_argument("--saampling",nargs='?',default="garBage",help="Perform sampling from data") + parser.add_argument("--percent",nargs='?',default=0.8,type=float,help="percent value at which data splitting is done") + parser.add_argument("--nzveval",nargs='?',default='FALSE',help="remove near zero values") + parser.add_argument("--nzvresult",nargs='?',default='hide',help="write in outputfile about removing near zero values") + parser.add_argument("--nzvecho",nargs='?',default='FALSE',help="print about removing near zero values") + parser.add_argument("--corrfiltereval",nargs='?',default='FALSE',help="remove higly correlated values") + parser.add_argument("--corrfilterresult",nargs='?',default='hide',help="write in outputfile about removing highly correlated values") + parser.add_argument("--corrfilterecho",nargs='?',default='FALSE',help="print about removing correlated values") + parser.add_argument("--threshholdcor",nargs='?',default=0.75,type=float,help="percent value at which correlated values ommitted ") + parser.add_argument("--preproceval",nargs='?',default='FALSE',help="pre proccesing") + parser.add_argument("--preprocecho",nargs='?',default='FALSE',help="print about pre proccesing") + parser.add_argument("--preprocresult",nargs='?',default='hide',help="write in output file about pre proccesing") + parser.add_argument("--setupworkersecho",nargs='?',default='FALSE',help="print about number of processors") + parser.add_argument("--setupworkersresult",nargs='?',default='tex',help="write about number of processors in output file") + parser.add_argument("--numworkers",nargs='?',default=1,type=int,help="defines used processors") + parser.add_argument("--setupresamplingecho",nargs='?',default='FALSE',help="print resampling rules") + parser.add_argument("--setupresamplingresult",nargs='?',default='hide',help="write resampling rules in output file") + parser.add_argument("--resampname",nargs='?',default='boot632',help="choose type of resampling") + parser.add_argument("--resamplenumber",nargs='?',default=10,type=int,help="set number of resampling") + parser.add_argument("--numrepeat",nargs='?',default=3,type=int,help="set times of repeat") + parser.add_argument("--resamplenumberpercent",nargs='?',default=0.75,type=float,help="set PERCENT resampling") + parser.add_argument("--setupgridresult",nargs='?',default='hide',help="write about number of grids in output file") + parser.add_argument("--setupgridecho",nargs='?',default='FALSE',help="print about number of grids") + parser.add_argument("--setupgridsize",nargs='?',default=3,type=int,help="set number of grids") + parser.add_argument("--fitmodelresult",nargs='?',default='hide',help="write about model") + parser.add_argument("--fitmodelecho",nargs='?',default='FALSE',help="print about model") + parser.add_argument("--fitmodeleval",nargs='?',default='TRUE',help="start model building") + parser.add_argument("--modeldescrecho",nargs='?',default='FALSE',help="print model description") + parser.add_argument("--modeldescrresult",nargs='?',default='hide',help="write model description in outout file") + parser.add_argument("--resamptableecho",nargs='?',default='FALSE',help="print resample table") + parser.add_argument("--resamptableresult",nargs='?',default='tex',help="write resample table in output file") + parser.add_argument("--profileplotecho",nargs='?',default='FALSE',help="print about profile plots") + parser.add_argument("--profileplotfig",nargs='?',default='TRUE',help=" profile plots") + parser.add_argument("--stopworkersecho",nargs='?',default='FALSE',help="stop workers ie processors") + parser.add_argument("--stopworkersresult",nargs='?',default='hide',help="write about workers ie processors used") + parser.add_argument("--testpredresult",nargs='?',default='tex',help="write about statistical measure") + parser.add_argument("--testpredecho",nargs='?',default='FALSE',help="print about statistical measure") + parser.add_argument("--classprobstexresult",nargs='?',default='tex',help="paste various figure of statistical measure in outputfile") + parser.add_argument("--classprobstexecho",nargs='?',default='FALSE',help="print various figure of statistical measure") + parser.add_argument("--classprobstexresult1",nargs='?',default='hide',help="create roc curve in outputfile") + parser.add_argument("--classprobstexecho1",nargs='?',default='FALSE',help="print figure of statistical measure") + parser.add_argument("--savedataecho",nargs='?',default='FALSE',help="information about completion of model building ") + parser.add_argument("--savedataresult",nargs='?',default='hide',help="write information about completion of model building in outputfile ") + parser.add_argument("--datasets", help="name of the generated datasets") + parser.add_argument("--outputmodel", help="give name for the generated model") + parser.add_argument("--outputresultpdf", help="give name for the output pdf file") + + args = parser.parse_args() + return args + +def generateRnwScript(): + + import templateLibrary + t = templateLibrary.__template4Rnw() + + from string import Template + s = Template(t) + + args = __inputArguments() + + templt = s.safe_substitute(METHOD=args.method, + RDATA=args.rdata, + GETDATAINFOEVAL=args.getdatainfoeval, + GETDATAINFOECHO=args.getdatainfoecho, + GETDATAINFORESULT=args.getdatainforesult, + MISSINGFILTEREVAL=args.missingfiltereval, + MISSINGFILTERECHO=args.missingfilterecho, + MISSINGFILTERRESULT=args.missingfilterresult, + MISSINGFILTERTHRESHC=args.missingfilterthreshc, + MISSINGFILTERTHRESHR=args.missingfilterthreshr, + PCAEVAL=args.pcaeval, + PCAECHO=args.pcaecho, + PCARESULT=args.pcaresult, + PCACOMP=args.pcacomp, + PCAPLOTEVAL=args.pcaploteval, + PCAPLOTECHO=args.pcaplotecho, + PCAPLOTRESULT=args.pcaplotresult, + PCAPLOTFIG=args.pcaplotfig, + INITIALDATASPLITEVAL=args.initialdataspliteval, + INITIALDATASPLITECHO=args.initialdatasplitecho, + INITIALDATASPLITRESULT=args.initialdatasplitresult, + SAAMPLING=args.saampling, + PERCENT=args.percent, + NZVEVAL=args.nzveval, + NZVRESULT=args.nzvresult, + NZVECHO=args.nzvecho, + CORRFILTEREVAL=args.corrfiltereval, + CORRFILTERRESULT=args.corrfilterresult, + CORRFILTERECHO=args.corrfilterecho, + THRESHHOLDCOR=args.threshholdcor, + PREPROCEVAL=args.preproceval, + PREPROCECHO=args.preprocecho, + PREPROCRESULT=args.preprocresult, + SETUPWORKERSECHO=args.setupworkersecho, + SETUPWORKERSRESULT=args.setupworkersresult, + NUMWORKERS=args.numworkers, + SETUPRESAMPLINGECHO=args.setupresamplingecho, + SETUPRESAMPLINGRESULT=args.setupresamplingresult, + RESAMPNAME=args.resampname, + RESAMPLENUMBER=args.resamplenumber, + NUMREPEAT=args.numrepeat, + RESAMPLENUMBERPERCENT=args.resamplenumberpercent, + SETUPGRIDRESULT=args.setupgridresult, + SETUPGRIDECHO=args.setupgridecho, + SETUPGRIDSIZE=args.setupgridsize, + FITMODELRESULT=args.fitmodelresult, + FITMODELECHO=args.fitmodelecho, + FITMODELEVAL=args.fitmodeleval, + MODELDESCRECHO=args.modeldescrecho, + MODELDESCRRESULT=args.modeldescrresult, + RESAMPTABLEECHO=args.resamptableecho, + RESAMPTABLERESULT=args.resamptableresult, + PROFILEPLOTECHO=args.profileplotecho, + PROFILEPLOTFIG=args.profileplotfig, + STOPWORKERSECHO=args.stopworkersecho, + STOPWORKERSRESULT=args.stopworkersresult, + TESTPREDRESULT=args.testpredresult, + TESTPREDECHO=args.testpredecho, + CLASSPROBSTEXRESULT=args.classprobstexresult, + CLASSPROBSTEXECHO=args.classprobstexecho, + CLASSPROBSTEXRESULT1=args.classprobstexresult1, + CLASSPROBSTEXECHO1=args.classprobstexecho1, + SAVEDATAECHO=args.savedataecho, + SAVEDATARESULT=args.savedataresult ) + + + f = open('result-doc.Rnw','w') + f.write(templt) + f.close() + +def modelBuilding(): + + import os + os.system('R CMD Sweave result-doc.Rnw > cmd.log.1 2>&1') + os.system('pdflatex result-doc.tex > cmd.log.2 2>&1') + os.system('pdflatex result-doc.tex > cmd.log.2 2>&1') +# os.system('pdflatex result-doc.tex 2>&1 | tee cmd.log.2') + args = __inputArguments() + + from string import Template + s1 = Template('cp $METHOD-Fit.RData $OUTPUTMODEL') + s2 = Template('cp result-doc.pdf $OUTPUTRESULTPDF') + s3 = Template('cp datasets.RData $DATASETS') + + cmd1 = s1.safe_substitute(METHOD=args.method, OUTPUTMODEL=args.outputmodel) + cmd2 = s2.safe_substitute(OUTPUTRESULTPDF=args.outputresultpdf) + cmd3 = s3.safe_substitute(DATASETS=args.datasets) + + os.system(cmd1) + os.system(cmd2) + os.system(cmd3) + +if __name__ == "__main__" : + + generateRnwScript() + modelBuilding() diff -r 000000000000 -r ee374e48024f tool2/templateLibrary.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool2/templateLibrary.py Thu Jan 21 00:34:58 2016 -0500 @@ -0,0 +1,1253 @@ +def __template4Rnw(): + + template4Rnw = r'''%% Classification Modeling Script +%% Max Kuhn (max.kuhn@pfizer.com, mxkuhn@gmail.com) +%% Version: 1.00 +%% Created on: 2010/10/02 +%% +%% This is an Sweave template for building and describing +%% classification models. It mixes R and LaTeX code. The document can +%% be processing using R's Sweave function to produce a tex file. +%% +%% The inputs are: +%% - the initial data set in a data frame called 'rawData' +%% - a factor column in the data set called 'class'. this should be the +%% outcome variable +%% - all other columns in rawData should be predictor variables +%% - the type of model should be in a variable called 'modName'. +%% +%% The script attempts to make some intelligent choices based on the +%% model being used. For example, if modName is "pls", the script will +%% automatically center and scale the predictor data. There are +%% situations where these choices can (and should be) changed. +%% +%% There are other options that may make sense to change. For example, +%% the user may want to adjust the type of resampling. To find these +%% parts of the script, search on the string 'OPTION'. These parts of +%% the code will document the options. + +\documentclass[14pt]{report} +\usepackage{amsmath} +\usepackage[pdftex]{graphicx} +\usepackage{color} +\usepackage{ctable} +\usepackage{xspace} +\usepackage{fancyvrb} +\usepackage{fancyhdr} +\usepackage{lastpage} +\usepackage{longtable} +\usepackage{algorithm2e} +\usepackage[ + colorlinks=true, + linkcolor=blue, + citecolor=blue, + urlcolor=blue] + {hyperref} +\usepackage{lscape} +\usepackage{Sweave} +\SweaveOpts{keep.source = TRUE} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +% define new colors for use +\definecolor{darkgreen}{rgb}{0,0.6,0} +\definecolor{darkred}{rgb}{0.6,0.0,0} +\definecolor{lightbrown}{rgb}{1,0.9,0.8} +\definecolor{brown}{rgb}{0.6,0.3,0.3} +\definecolor{darkblue}{rgb}{0,0,0.8} +\definecolor{darkmagenta}{rgb}{0.5,0,0.5} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\newcommand{\bld}[1]{\mbox{\boldmath $$#1$$}} +\newcommand{\shell}[1]{\mbox{$$#1$$}} +\renewcommand{\vec}[1]{\mbox{\bf {#1}}} + +\newcommand{\ReallySmallSpacing}{\renewcommand{\baselinestretch}{.6}\Large\normalsize} +\newcommand{\SmallSpacing}{\renewcommand{\baselinestretch}{1.1}\Large\normalsize} + +\newcommand{\halfs}{\frac{1}{2}} + +\setlength{\oddsidemargin}{-.25 truein} +\setlength{\evensidemargin}{0truein} +\setlength{\topmargin}{-0.2truein} +\setlength{\textwidth}{7 truein} +\setlength{\textheight}{8.5 truein} +\setlength{\parindent}{0.20truein} +\setlength{\parskip}{0.10truein} + +\setcounter{LTchunksize}{50} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\pagestyle{fancy} +\lhead{} +%% OPTION Report header name +\chead{Classification Model Script} +\rhead{} +\lfoot{} +\cfoot{} +\rfoot{\thepage\ of \pageref{LastPage}} +\renewcommand{\headrulewidth}{1pt} +\renewcommand{\footrulewidth}{1pt} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%% OPTION Report title and modeler name +\title{Classification Model Script using $METHOD} +\author{"Lynn Group with M. Kuhn, SCIS, JNU, New Delhi"} + +\begin{document} + +\maketitle + +\thispagestyle{empty} +<>= +# sets values for variables used later in the program to prevent the \Sexpr error on parsing with Sweave +numSamples='' +classDistString='' +missingText='' +numPredictors='' +numPCAcomp='' +pcaText='' +nzvText='' +corrText='' +ppText='' +varText='' +splitText="Dummy Text" +nirText="Dummy Text" +# pctTrain is a variable that is initialised in Data splitting, and reused later in testPred +pctTrain=0.8 +Smpling='' +nzvText1='' +classDistString1='' +dwnsmpl='' +upsmpl='' + +@ +<>= +library(Hmisc) +library(caret) +library(pROC) +versionTest <- compareVersion(packageDescription("caret")$$Version, + "4.65") +if(versionTest < 0) stop("caret version 4.65 or later is required") + +library(RColorBrewer) + + +listString <- function (x, period = FALSE, verbose = FALSE) +{ + if (verbose) cat("\n entering listString\n") + flush.console() + if (!is.character(x)) + x <- as.character(x) + numElements <- length(x) + out <- if (length(x) > 0) { + switch(min(numElements, 3), x, paste(x, collapse = " and "), + { + x <- paste(x, c(rep(",", numElements - 2), " and", ""), sep = "") + paste(x, collapse = " ") + }) + } + else "" + if (period) out <- paste(out, ".", sep = "") + if (verbose) cat(" leaving listString\n\n") + flush.console() + out +} + +resampleStats <- function(x, digits = 3) + { + bestPerf <- x$$bestTune + colnames(bestPerf) <- gsub("^\\.", "", colnames(bestPerf)) + out <- merge(x$$results, bestPerf) + out <- out[, colnames(out) %in% x$$perfNames] + names(out) <- gsub("ROC", "area under the ROC curve", names(out), fixed = TRUE) + names(out) <- gsub("Sens", "sensitivity", names(out), fixed = TRUE) + names(out) <- gsub("Spec", "specificity", names(out), fixed = TRUE) + names(out) <- gsub("Accuracy", "overall accuracy", names(out), fixed = TRUE) + names(out) <- gsub("Kappa", "Kappa statistics", names(out), fixed = TRUE) + + out <- format(out, digits = digits) + listString(paste(names(out), "was", out)) + } + +twoClassNoProbs <- function (data, lev = NULL, model = NULL) +{ + out <- c(sensitivity(data[, "pred"], data[, "obs"], lev[1]), + specificity(data[, "pred"], data[, "obs"], lev[2]), + confusionMatrix(data[, "pred"], data[, "obs"])$$overall["Kappa"]) + + names(out) <- c("Sens", "Spec", "Kappa") + out +} + + + +##OPTION: model name: see ?train for more values/models +modName <- "$METHOD" + + +load("$RDATA") +rawData <- dataX +rawData$$outcome <- dataY + +@ + + +\section*{Data Sets}\label{S:data} + +%% OPTION: provide some background on the problem, the experimental +%% data, how the compounds were selected etc + +<>= +if(!any(names(rawData) == "outcome")) stop("a variable called outcome should be in the data set") +if(!is.factor(rawData$$outcome)) stop("the outcome should be a factor vector") + +## OPTION: when there are only two classes, the first level of the +## factor is used as the "positive" or "event" for calculating +## sensitivity and specificity. Adjust the outcome factor accordingly. +numClasses <- length(levels(rawData$$outcome)) +numSamples <- nrow(rawData) +numPredictors <- ncol(rawData) - 1 +predictorNames <- names(rawData)[names(rawData) != "outcome"] + +isNum <- apply(rawData[,predictorNames, drop = FALSE], 2, is.numeric) +if(any(!isNum)) stop("all predictors in rawData should be numeric") + +classTextCheck <- all.equal(levels(rawData$$outcome), make.names(levels(rawData$$outcome))) +if(!classTextCheck) warning("the class levels are not valid R variable names; this may cause errors") + +## Get the class distribution +classDist <- table(rawData$$outcome) +classDistString <- paste("``", + names(classDist), + "'' ($$n$$=", + classDist, + ")", + sep = "") +classDistString <- listString(classDistString) +@ + +<>= +colRate <- apply(rawData[, predictorNames, drop = FALSE], + 2, function(x) mean(is.na(x))) + +##OPTION thresholds can be changed +colExclude <- colRate > $MISSINGFILTERTHRESHC + +missingText <- "" + +if(any(colExclude)) + { + missingText <- paste(missingText, + ifelse(sum(colExclude) > 1, + " There were ", + " There was "), + sum(colExclude), + ifelse(sum(colExclude) > 1, + " predictors ", + " predictor "), + "with an excessive number of ", + "missing data. ", + ifelse(sum(colExclude) > 1, + " These were excluded. ", + " This was excluded. ")) + predictorNames <- predictorNames[!colExclude] + rawData <- rawData[, names(rawData) %in% c("outcome", predictorNames), drop = FALSE] + } + + +rowRate <- apply(rawData[, predictorNames, drop = FALSE], + 1, function(x) mean(is.na(x))) + +rowExclude <- rowRate > $MISSINGFILTERTHRESHR + + +if(any(rowExclude)) { + missingText <- paste(missingText, + ifelse(sum(rowExclude) > 1, + " There were ", + " There was "), + sum(colExclude), + ifelse(sum(rowExclude) > 1, + " samples ", + " sample "), + "with an excessive number of ", + "missing data. ", + ifelse(sum(rowExclude) > 1, + " These were excluded. ", + " This was excluded. "), + "After filtering, ", + sum(!rowExclude), + " samples remained.") + rawData <- rawData[!rowExclude, ] + hasMissing <- apply(rawData[, predictorNames, drop = FALSE], + 1, function(x) mean(is.na(x))) + } else { + hasMissing <- apply(rawData[, predictorNames, drop = FALSE], + 1, function(x) any(is.na(x))) + missingText <- paste(missingText, + ifelse(missingText == "", + "There ", + "Subsequently, there "), + ifelse(sum(hasMissing) == 1, + "was ", + "were "), + ifelse(sum(hasMissing) > 0, + sum(hasMissing), + "no"), + ifelse(sum(hasMissing) == 1, + "sample ", + "samples "), + "with missing values.") + + rawData <- rawData[complete.cases(rawData),] + + } + +rawData1 <- rawData[,1:length(rawData)-1] +rawData2 <- rawData[,length(rawData)] + +set.seed(222) +nzv1 <- nearZeroVar(rawData1) + if(length(nzv1) > 0) + { + nzvVars1 <- names(rawData1)[nzv1] + rawData <- rawData1[, -nzv1] + rawData$outcome <- rawData2 + nzvText1 <- paste("There were ", + length(nzv1), + " predictors that were removed from original data due to", + " severely unbalanced distributions that", + " could negatively affect the model fit", + ifelse(length(nzv1) > 10, + ".", + paste(": ", + listString(nzvVars1), + ".", + sep = "")), + sep = "") + + } else { +rawData <- rawData1 +rawData$outcome <- rawData2 +nzvText1 <- "" + +} + +remove("rawData1") +remove("rawData2") + +@ + + The initial data set consisted of \Sexpr{numSamples} samples and +\Sexpr{numPredictors} predictor variables. The breakdown of the +outcome data classes were: \Sexpr{classDistString}. + + \Sexpr{missingText} + + \Sexpr{nzvText1} + +<>= + +predictorNames <- names(rawData)[names(rawData) != "outcome"] +numPredictors <- length(predictorNames) +predictors <- rawData[, predictorNames, drop = FALSE] +## PCA will fail with predictors having less than 2 unique values +isZeroVar <- apply(predictors, 2, + function(x) length(unique(x)) < 2) +if(any(isZeroVar)) predictors <- predictors[, !isZeroVar, drop = FALSE] +## For whatever, only the formula interface to prcomp +## handles missing values +pcaForm <- as.formula( + paste("~", + paste(names(predictors), collapse = "+"))) +pca <- prcomp(pcaForm, + data = predictors, + center = TRUE, + scale. = TRUE, + na.action = na.omit) +## OPTION: the number of components plotted/discussed can be set +numPCAcomp <- $PCACOMP +pctVar <- pca$$sdev^2/sum(pca$$sdev^2)*100 +pcaText <- paste(round(pctVar[1:numPCAcomp], 1), + "\\\\%", + sep = "") +pcaText <- listString(pcaText) +@ + + To get an initial assessment of the separability of the classes, + principal component analysis (PCA) was used to distill the + \Sexpr{numPredictors} predictors down into \Sexpr{numPCAcomp} + surrogate variables (i.e. the principal components) in a manner that + attempts to maximize the amount of information preserved from the + original predictor set. Figure \ref{F:inititalPCA} contains plots of + the first \Sexpr{numPCAcomp} components, which accounted for + \Sexpr{pcaText} percent of the variability in the original predictors + (respectively). + + +%% OPTION: remark on how well (or poorly) the data separated + + \setkeys{Gin}{width = 0.8\textwidth} + \begin{figure}[p] + \begin{center} + +<>= +trellis.par.set(caretTheme(), warn = TRUE) +if(numPCAcomp == 2) + { + axisRange <- extendrange(pca$$x[, 1:2]) + print( + xyplot(PC1 ~ PC2, + data = as.data.frame(pca$$x), + type = c("p", "g"), + groups = rawData$$outcome, + auto.key = list(columns = 2), + xlim = axisRange, + ylim = axisRange)) + } else { + axisRange <- extendrange(pca$$x[, 1:numPCAcomp]) + print( + splom(~as.data.frame(pca$$x)[, 1:numPCAcomp], + type = c("p", "g"), + groups = rawData$$outcome, + auto.key = list(columns = 2), + as.table = TRUE, + prepanel.limits = function(x) axisRange + )) + + } + +@ + + \caption[PCA Plot]{A plot of the first \Sexpr{numPCAcomp} + principal components for the original data set.} + \label{F:inititalPCA} + \end{center} + \end{figure} + + + +<>= + + ## OPTION: in small samples sizes, you may not want to set aside a + ## training set and focus on the resampling results. + +set.seed(1234) +dataX <- rawData[,1:length(rawData)-1] +dataY <- rawData[,length(rawData)] + + Smpling <- "$SAAMPLING" + +if(Smpling=="downsampling") +{ +dwnsmpl <- downSample(dataX,dataY) +rawData <- dwnsmpl[,1:length(dwnsmpl)-1] +rawData$outcome <- dwnsmpl[,length(dwnsmpl)] +remove("dwnsmpl") +remove("dataX") +remove("dataY") +}else if(Smpling=="upsampling"){ +upsmpl <- upSample(dataX,dataY) +rawData <- upsmpl[,1:length(upsmpl)-1] +rawData$outcome <- upsmpl[,length(upsmpl)] +remove("upsmpl") +remove("dataX") +remove("dataY") +}else{remove("dataX") +remove("dataY") +} + + + +numSamples <- nrow(rawData) + +predictorNames <- names(rawData)[names(rawData) != "outcome"] +numPredictors <- length(predictorNames) + + +classDist1 <- table(rawData$outcome) +classDistString1 <- paste("``", + names(classDist1), + "'' ($n$=", + classDist1, + ")", + sep = "") +classDistString1 <- listString(classDistString1) + + pctTrain <- $PERCENT + +if(pctTrain < 1) + { + ## OPTION: seed number can be changed + set.seed(1) + inTrain <- createDataPartition(rawData$$outcome, + p = pctTrain, + list = FALSE) + trainX <- rawData[ inTrain, predictorNames] + testX <- rawData[-inTrain, predictorNames] + trainY <- rawData[ inTrain, "outcome"] + testY <- rawData[-inTrain, "outcome"] + splitText <- paste("The original data were split into ", + "a training set ($$n$$=", + nrow(trainX), + ") and a test set ($$n$$=", + nrow(testX), + ") in a manner that preserved the ", + "distribution of the classes.", + sep = "") + isZeroVar <- apply(trainX, 2, + function(x) length(unique(x)) < 2) + if(any(isZeroVar)) + { + trainX <- trainX[, !isZeroVar, drop = FALSE] + testX <- testX[, !isZeroVar, drop = FALSE] + } + + } else { + trainX <- rawData[, predictorNames] + testX <- NULL + trainY <- rawData[, "outcome"] + testY <- NULL + splitText <- "The entire data set was used as the training set." + } +trainDist <- table(trainY) +nir <- max(trainDist)/length(trainY)*100 +niClass <- names(trainDist)[which.max(trainDist)] +nirText <- paste("The non--information rate is the accuracy that can be ", + "achieved by predicting all samples using the most ", + "dominant class. For these data, the rate is ", + round(nir, 2), "\\\\% using the ``", + niClass, + "'' class.", + sep = "") + +remove("rawData") + +if((!is.null(testX)) && (!is.null(testY))){ +save(trainX,trainY,testX,testY,file="datasets.RData") +} else { +save(trainX,trainY,file="datasets.RData") +} + +@ + + \Sexpr{splitText} + + \Sexpr{nirText} + +The data set for model building consisted of \Sexpr{numSamples} samples and +\Sexpr{numPredictors} predictor variables. The breakdown of the +outcome data classes were: \Sexpr{classDistString1}. + +<>= +## OPTION: other pre-processing steps can be used +ppSteps <- caret:::suggestions(modName) + +set.seed(2) +if(ppSteps["nzv"]) + { + nzv <- nearZeroVar(trainX) + if(length(nzv) > 0) + { + nzvVars <- names(trainX)[nzv] + trainX <- trainX[, -nzv] + nzvText <- paste("There were ", + length(nzv), + " predictors that were removed from train set due to", + " severely unbalanced distributions that", + " could negatively affect the model", + ifelse(length(nzv) > 10, + ".", + paste(": ", + listString(nzvVars), + ".", + sep = "")), + sep = "") + testX <- testX[, -nzv] + } else nzvText <- "" + } else nzvText <- "" +@ + +\Sexpr{nzvText} + + +<>= +if(ppSteps["corr"]) + { + ## OPTION: + corrThresh <- $THRESHHOLDCOR + highCorr <- findCorrelation(cor(trainX, use = "pairwise.complete.obs"), + corrThresh) + if(length(highCorr) > 0) + { + corrVars <- names(trainX)[highCorr] + trainX <- trainX[, -highCorr] + corrText <- paste("There were ", + length(highCorr), + " predictors that were removed due to", + " large between--predictor correlations that", + " could negatively affect the model fit", + ifelse(length(highCorr) > 10, + ".", + paste(": ", + listString(highCorr), + ".", + sep = "")), + " Removing these predictors forced", + " all pair--wise correlations to be", + " less than ", + corrThresh, + ".", + sep = "") + testX <- testX[, -highCorr] + } else corrText <- "No correlation among data on given threshold" + }else corrText <- "" +@ + + \Sexpr{corrText} + +<>= +ppMethods <- NULL +if(ppSteps["center"]) ppMethods <- c(ppMethods, "center") +if(ppSteps["scale"]) ppMethods <- c(ppMethods, "scale") +if(any(hasMissing) > 0) ppMethods <- c(ppMethods, "knnImpute") +##OPTION other methods, such as spatial sign, can be added to this list + +if(length(ppMethods) > 0) + { + ppInfo <- preProcess(trainX, method = ppMethods) + trainX <- predict(ppInfo, trainX) + if(pctTrain < 1) testX <- predict(ppInfo, testX) + ppText <- paste("The following pre--processing methods were", + " applied to the training", + ifelse(pctTrain < 1, " and test", ""), + " data: ", + listString(ppMethods), + ".", + sep = "") + ppText <- gsub("center", "mean centering", ppText) + ppText <- gsub("scale", "scaling to unit variance", ppText) + ppText <- gsub("knnImpute", + paste(ppInfo$$k, "--nearest neighbor imputation", sep = ""), + ppText) + ppText <- gsub("spatialSign", "the spatial sign transformation", ppText) + ppText <- gsub("pca", "principal component feature extraction", ppText) + ppText <- gsub("ica", "independent component feature extraction", ppText) + } else { + ppInfo <- NULL + ppText <- "" + } + +predictorNames <- names(trainX) +if(nzvText != "" | corrText != "" | ppText != "") + { + varText <- paste("After pre--processing, ", + ncol(trainX), + "predictors remained for modeling.") + } else varText <- "" + +@ + + \Sexpr{ppText} + \Sexpr{varText} + +\clearpage + +\section*{Model Building} + +<>= +numWorkers <- $NUMWORKERS +##OPTION: turn up numWorkers to use MPI +if(numWorkers > 1) + { + mpiCalcs <- function(X, FUN, ...) + { + theDots <- list(...) + parLapply(theDots$$cl, X, FUN) + } + + library(snow) + cl <- makeCluster(numWorkers, "MPI") + } +@ + +<>= +##OPTION: the resampling options can be changed. See +## ?trainControl for details + +resampName <- "$RESAMPNAME" +resampNumber <- $RESAMPLENUMBER +numRepeat <- $NUMREPEAT +resampP <- $RESAMPLENUMBERPERCENT + +modelInfo <- modelLookup(modName) + +if(numClasses == 2) + { + foo <- if(any(modelInfo$$probModel)) twoClassSummary else twoClassNoProbs + } else foo <- defaultSummary + +set.seed(3) +ctlObj <- trainControl(method = resampName, + number = resampNumber, + repeats = numRepeat, + p = resampP, + classProbs = any(modelInfo$$probModel), + summaryFunction = foo) + + +##OPTION select other performance metrics as needed +optMetric <- if(numClasses == 2 & any(modelInfo$$probModel)) "ROC" else "Kappa" + +if(numWorkers > 1) + { + ctlObj$$workers <- numWorkers + ctlObj$$computeFunction <- mpiCalcs + ctlObj$$computeArgs <- list(cl = cl) + } +@ + +<>= +##OPTION expand or contract these grids as needed (or +## add more models + +gridSize <- $SETUPGRIDSIZE + +if(modName %in% c("svmPoly", "svmRadial", "svmLinear", "lvq", "ctree2", "ctree")) gridSize <- 5 +if(modName %in% c("earth", "fda")) gridSize <- 7 +if(modName %in% c("knn", "rocc", "glmboost", "rf", "nodeHarvest")) gridSize <- 10 + +if(modName %in% c("nb")) gridSize <- 2 +if(modName %in% c("pam", "rpart")) gridSize <- 15 +if(modName %in% c("pls")) gridSize <- min(20, ncol(trainX)) + +if(modName == "gbm") + { + tGrid <- expand.grid(.interaction.depth = -1 + (1:5)*2 , + .n.trees = (1:10)*20, + .shrinkage = .1) + } + +if(modName == "nnet") + { + tGrid <- expand.grid(.size = -1 + (1:5)*2 , + .decay = c(0, .001, .01, .1)) + } + +if(modName == "ada") + { + tGrid <- expand.grid(.maxdepth = 1, .iter = c(100,200,300,400), .nu = 1 ) + + } + + +@ + +<>= +##OPTION alter as needed + +set.seed(4) +modelFit <- switch(modName, + gbm = + { + mix <- sample(seq(along = trainY)) + train( + trainX[mix,], trainY[mix], modName, + verbose = FALSE, + bag.fraction = .9, + metric = optMetric, + trControl = ctlObj, + tuneGrid = tGrid) + }, + + multinom = + { + train( + trainX, trainY, modName, + trace = FALSE, + metric = optMetric, + maxiter = 1000, + MaxNWts = 5000, + trControl = ctlObj, + tuneLength = gridSize) + }, + + nnet = + { + train( + trainX, trainY, modName, + metric = optMetric, + linout = FALSE, + trace = FALSE, + maxiter = 1000, + MaxNWts = 5000, + trControl = ctlObj, + tuneGrid = tGrid) + + }, + + svmRadial =, svmPoly =, svmLinear = + { + train( + trainX, trainY, modName, + metric = optMetric, + scaled = TRUE, + trControl = ctlObj, + tuneLength = gridSize) + }, + { + train(trainX, trainY, modName, + trControl = ctlObj, + metric = optMetric, + tuneLength = gridSize) + }) + +@ + +<>= +summaryText <- "" + +resampleName <- switch(tolower(modelFit$$control$$method), + boot = paste("the bootstrap (", length(modelFit$$control$$index), " reps)", sep = ""), + boot632 = paste("the bootstrap 632 rule (", length(modelFit$$control$$index), " reps)", sep = ""), + cv = paste("cross-validation (", modelFit$$control$$number, " fold)", sep = ""), + repeatedcv = paste("cross-validation (", modelFit$$control$$number, " fold, repeated ", + modelFit$$control$$repeats, " times)", sep = ""), + lgocv = paste("repeated train/test splits (", length(modelFit$$control$$index), " reps, ", + round(modelFit$$control$$p, 2), "$$\\%$$)", sep = "")) + +tuneVars <- latexTranslate(tolower(modelInfo$$label)) +tuneVars <- gsub("\\#", "the number of ", tuneVars, fixed = TRUE) +if(ncol(modelFit$$bestTune) == 1 && colnames(modelFit$$bestTune) == ".parameter") + { + summaryText <- paste(summaryText, + "\n\n", + "There are no tuning parameters associated with this model.", + "To characterize the model performance on the training set,", + resampleName, + "was used.", + "Table \\\\ref{T:resamps} and Figure \\\\ref{F:profile}", + "show summaries of the resampling results. ") + + } else { + summaryText <- paste("There", + ifelse(nrow(modelInfo) > 1, "are", "is"), + nrow(modelInfo), + ifelse(nrow(modelInfo) > 1, "tuning parameters", "tuning parameter"), + "associated with this model:", + listString(tuneVars, period = TRUE)) + + + + paramNames <- gsub(".", "", names(modelFit$$bestTune), fixed = TRUE) + for(i in seq(along = paramNames)) + { + check <- modelInfo$$parameter %in% paramNames[i] + if(any(check)) + { + paramNames[i] <- modelInfo$$label[which(check)] + } + } + + paramNames <- gsub("#", "the number of ", paramNames, fixed = TRUE) + ## Check to see if there was only one combination fit + summaryText <- paste(summaryText, + "To choose", + ifelse(nrow(modelInfo) > 1, + "appropriate values of the tuning parameters,", + "an appropriate value of the tuning parameter,"), + resampleName, + "was used to generated a profile of performance across the", + nrow(modelFit$$results), + ifelse(nrow(modelInfo) > 1, + "combinations of the tuning parameters.", + "candidate values."), + + "Table \\\\ref{T:resamps} and Figure \\\\ref{F:profile} show", + "summaries of the resampling profile. ", "The final model fitted to the entire training set was:", + listString(paste(latexTranslate(tolower(paramNames)), "=", modelFit$$bestTune[1,]), period = TRUE)) + + } +@ + +\Sexpr{summaryText} + +<>= +tableData <- modelFit$$results + +if(all(modelInfo$$parameter == "parameter") && resampName == "boot632") + { + tableData <- tableData[,-1, drop = FALSE] + colNums <- c( length(modelFit$$perfNames), length(modelFit$$perfNames), length(modelFit$$perfNames)) + colLabels <- c("Mean", "Standard Deviation","Apparant") + constString <- "" + isConst <- NULL + } else if (all(modelInfo$$parameter == "parameter") && (resampName == "boot" | resampName == "cv" | resampName == "repeatedcv" )){ + tableData <- tableData[,-1, drop = FALSE] + colNums <- c(length(modelFit$$perfNames), length(modelFit$$perfNames)) + colLabels <- c("Mean", "Standard Deviation") + constString <- "" + isConst <- NULL + } else if (all(modelInfo$$parameter == "parameter") && resampName == "LOOCV" ){ + tableData <- tableData[,-1, drop = FALSE] + colNums <- length(modelFit$$perfNames) + colLabels <- c("Measures") + constString <- "" + isConst <- NULL +} else { + if (all(modelInfo$$parameter != "parameter") && resampName == "boot632" ){ + isConst <- apply(tableData[, modelInfo$$parameter, drop = FALSE], + 2, + function(x) length(unique(x)) == 1) + + numParamInTable <- sum(!isConst) + + if(any(isConst)) + { + constParam <- modelInfo$$parameter[isConst] + constValues <- format(tableData[, constParam, drop = FALSE], digits = 4)[1,,drop = FALSE] + tableData <- tableData[, !(names(tableData) %in% constParam), drop = FALSE] + constString <- paste("The tuning", + ifelse(sum(isConst) > 1, + "parmeters", + "parameter"), + listString(paste("``", names(constValues), "''", sep = "")), + ifelse(sum(isConst) > 1, + "were", + "was"), + "held constant at", + ifelse(sum(isConst) > 1, + "a value of", + "values of"), + listString(constValues[1,])) + + } else constString <- "" + + cn <- colnames(tableData) + for(i in seq(along = cn)) + { + check <- modelInfo$$parameter %in% cn[i] + if(any(check)) + { + cn[i] <- modelInfo$$label[which(check)] + } + } + colnames(tableData) <- cn + + colNums <- c(numParamInTable, + length(modelFit$$perfNames), + length(modelFit$$perfNames), + length(modelFit$$perfNames)) + colLabels <- c("", "Mean", "Standard Deviation", "Apparant") + +}else if (all(modelInfo$$parameter != "parameter") && (resampName == "boot" | resampName == "repeatedcv" | resampName == "cv") ){ + isConst <- apply(tableData[, modelInfo$$parameter, drop = FALSE], + 2, + function(x) length(unique(x)) == 1) + + numParamInTable <- sum(!isConst) + + if(any(isConst)) + { + constParam <- modelInfo$$parameter[isConst] + constValues <- format(tableData[, constParam, drop = FALSE], digits = 4)[1,,drop = FALSE] + tableData <- tableData[, !(names(tableData) %in% constParam), drop = FALSE] + constString <- paste("The tuning", + ifelse(sum(isConst) > 1, + "parmeters", + "parameter"), + listString(paste("``", names(constValues), "''", sep = "")), + ifelse(sum(isConst) > 1, + "were", + "was"), + "held constant at", + ifelse(sum(isConst) > 1, + "a value of", + "values of"), + listString(constValues[1,])) + + } else constString <- "" + + cn <- colnames(tableData) + for(i in seq(along = cn)) + { + check <- modelInfo$$parameter %in% cn[i] + if(any(check)) + { + cn[i] <- modelInfo$$label[which(check)] + } + } + colnames(tableData) <- cn + + colNums <- c(numParamInTable, + length(modelFit$$perfNames), + length(modelFit$$perfNames)) + colLabels <- c("", "Mean", "Standard Deviation") + +} +else if (all(modelInfo$$parameter != "parameter") && resampName == "LOOCV"){ + isConst <- apply(tableData[, modelInfo$$parameter, drop = FALSE], + 2, + function(x) length(unique(x)) == 1) + + numParamInTable <- sum(!isConst) + + if(any(isConst)) + { + constParam <- modelInfo$$parameter[isConst] + constValues <- format(tableData[, constParam, drop = FALSE], digits = 4)[1,,drop = FALSE] + tableData <- tableData[, !(names(tableData) %in% constParam), drop = FALSE] + constString <- paste("The tuning", + ifelse(sum(isConst) > 1, + "parmeters", + "parameter"), + listString(paste("``", names(constValues), "''", sep = "")), + ifelse(sum(isConst) > 1, + "were", + "was"), + "held constant at", + ifelse(sum(isConst) > 1, + "a value of", + "values of"), + listString(constValues[1,])) + + } else constString <- "" + + cn <- colnames(tableData) + for(i in seq(along = cn)) + { + check <- modelInfo$$parameter %in% cn[i] + if(any(check)) + { + cn[i] <- modelInfo$$label[which(check)] + } + } + colnames(tableData) <- cn + + colNums <- c(numParamInTable, + length(modelFit$$perfNames)) + colLabels <- c("", "Measures") + +} + +} + + + +colnames(tableData) <- gsub("SD$$", "", colnames(tableData)) +colnames(tableData) <- gsub("Apparent$$", "", colnames(tableData)) +colnames(tableData) <- latexTranslate(colnames(tableData)) +rownames(tableData) <- latexTranslate(rownames(tableData)) + +latex(tableData, + rowname = NULL, + file = "", + cgroup = colLabels, + n.cgroup = colNums, + where = "h!", + digits = 4, + longtable = nrow(tableData) > 30, + caption = paste(resampleName, "results from the model fit.", constString), + label = "T:resamps") +@ + + \setkeys{Gin}{ width = 0.9\textwidth} + \begin{figure}[b] + \begin{center} + +<>= + trellis.par.set(caretTheme(), warn = TRUE) +if(all(modelInfo$$parameter == "parameter") | all(isConst) | modName == "nb") + { + resultsPlot <- resampleHist(modelFit) + plotCaption <- paste("Distributions of model performance from the ", + "training set estimated using ", + resampleName) + } else { + if(modName %in% c("svmPoly", "svmRadial", "svmLinear")) + { + resultsPlot <- plot(modelFit, + metric = optMetric, + xTrans = function(x) log10(x)) + resultsPlot <- update(resultsPlot, + type = c("g", "p", "l"), + ylab = paste(optMetric, " (", resampleName, ")", sep = "")) + + } else { + resultsPlot <- plot(modelFit, + metric = optMetric) + resultsPlot <- update(resultsPlot, + type = c("g", "p", "l"), + ylab = paste(optMetric, " (", resampleName, ")", sep = "")) + } + plotCaption <- paste("A plot of the estimates of the", + optMetric, + "values calculated using", + resampleName) + } +print(resultsPlot) +@ + \caption[Performance Plot]{\Sexpr{plotCaption}.} + \label{F:profile} + \end{center} + \end{figure} + + +<>= +if(numWorkers > 1) stopCluster(cl) +@ + +<>= + if(pctTrain < 1) + { + cat("\\clearpage\n\\section*{Test Set Results}\n\n") + classPred <- predict(modelFit, testX) + cm <- confusionMatrix(classPred, testY) + values <- cm$$overall[c("Accuracy", "Kappa", "AccuracyPValue", "McnemarPValue")] + + values <- values[!is.na(values) & !is.nan(values)] + values <- c(format(values[1:2], digits = 3), + format.pval(values[-(1:2)], digits = 5)) + nms <- c("the overall accuracy", "the Kappa statistic", + "the $$p$$--value that accuracy is greater than the no--information rate", + "the $$p$$--value of concordance from McNemar's test") + nms <- nms[seq(along = values)] + names(values) <- nms + + if(any(modelInfo$$probModel)) + { + classProbs <- extractProb(list(fit = modelFit), + testX = testX, + testY = testY) + classProbs <- subset(classProbs, dataType == "Test") + if(numClasses == 2) + { + tmp <- twoClassSummary(classProbs, lev = levels(classProbs$$obs)) + tmp <- c(format(tmp, digits = 3)) + names(tmp) <- c("the sensitivity", "the specificity", + "the area under the ROC curve") + values <- c(values, tmp) + + } + probPlot <- plotClassProbs(classProbs) + } + testString <- paste("Based on the test set of", + nrow(testX), + "samples,", + listString(paste(names(values), "was", values), period = TRUE), + "The confusion matrix for the test set is shown in Table", + "\\\\ref{T:cm}.") + testString <- paste(testString, + " Using ", resampleName, + ", the training set estimates were ", + resampleStats(modelFit), + ".", + sep = "") + + if(any(modelInfo$$probModel)) testString <- paste(testString, + "Histograms of the class probabilities", + "for the test set samples are shown in", + "Figure \\\\ref{F:probs}", + ifelse(numClasses == 2, + " and the test set ROC curve is in Figure \\\\ref{F:roc}.", + ".")) + + + + latex(cm$$table, + title = "", + file = "", + where = "h", + cgroup = "Observed Values", + n.cgroup = numClasses, + caption = "The confusion matrix for the test set", + label = "T:cm") + + } else testString <- "" +@ +\Sexpr{testString} + + +<>= + if(any(modelInfo$probModel) && pctTrain < 1 ) { + cat( + paste("\\begin{figure}[p]\n", + "\\begin{center}\n", + "\\includegraphics{classProbs}", + "\\caption[PCA Plot]{Class probabilities", + "for the test set. Each panel contains ", + "separate classes}\n", + "\\label{F:probs}\n", + "\\end{center}\n", + "\\end{figure}")) + } + if(any(modelInfo$$probModel) & numClasses == 2 & pctTrain < 1 ) + { + cat( + paste("\\begin{figure}[p]\n", + "\\begin{center}\n", + "\\includegraphics[clip, width = .8\\textwidth]{roc}", + "\\caption[ROC Plot]{ROC Curve", + "for the test set.}\n", + "\\label{F:roc}\n", + "\\end{center}\n", + "\\end{figure}")) + } else { +cat (paste("")) +} + +@ +<>= + if(any(modelInfo$probModel) && pctTrain < 1) { + pdf("classProbs.pdf", height = 7, width = 7) + trellis.par.set(caretTheme(), warn = FALSE) + print(probPlot) + dev.off() + } + if(any(modelInfo$probModel) & numClasses == 2 & pctTrain < 1) { + resPonse<-testY + preDictor<-classProbs[, levels(trainY)[1]] + pdf("roc.pdf", height = 8, width = 8) +# from pROC example at http://web.expasy.org/pROC/screenshots.htm + plot.roc(resPonse, preDictor, # data + percent=TRUE, # show all values in percent + partial.auc=c(100, 90), partial.auc.correct=TRUE, # define a partial AUC (pAUC) + print.auc=TRUE, #display pAUC value on the plot with following options: + print.auc.pattern="Corrected pAUC (100-90%% SP):\n%.1f%%", print.auc.col="#1c61b6", + auc.polygon=TRUE, auc.polygon.col="#1c61b6", # show pAUC as a polygon + max.auc.polygon=TRUE, max.auc.polygon.col="#1c61b622", # also show the 100% polygon + main="Partial AUC (pAUC)") + plot.roc(resPonse, preDictor, + percent=TRUE, add=TRUE, type="n", # add to plot, but don't re-add the ROC itself (useless) + partial.auc=c(100, 90), partial.auc.correct=TRUE, + partial.auc.focus="se", # focus pAUC on the sensitivity + print.auc=TRUE, print.auc.pattern="Corrected pAUC (100-90%% SE):\n%.1f%%", print.auc.col="#008600", + print.auc.y=40, # do not print auc over the previous one + auc.polygon=TRUE, auc.polygon.col="#008600", + max.auc.polygon=TRUE, max.auc.polygon.col="#00860022") + dev.off() + } else { +cat("") + } + +@ + +\section*{Versions} + +<>= +toLatex(sessionInfo()) + +@ + +<>= +## change this to the name of modName.... +Fit<-modelFit +save(Fit,file="$METHOD-Fit.RData") +@ +The model was built using $METHOD and is saved as $METHOD Model for reuse. This contains the variable Fit. + +\end{document}''' + + return template4Rnw diff -r 000000000000 -r ee374e48024f tool2/templateLibrary.py.orig --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool2/templateLibrary.py.orig Thu Jan 21 00:34:58 2016 -0500 @@ -0,0 +1,1248 @@ +def __template4Rnw(): + + template4Rnw = r'''%% Classification Modeling Script +%% Max Kuhn (max.kuhn@pfizer.com, mxkuhn@gmail.com) +%% Version: 1.00 +%% Created on: 2010/10/02 +%% +%% This is an Sweave template for building and describing +%% classification models. It mixes R and LaTeX code. The document can +%% be processing using R's Sweave function to produce a tex file. +%% +%% The inputs are: +%% - the initial data set in a data frame called 'rawData' +%% - a factor column in the data set called 'class'. this should be the +%% outcome variable +%% - all other columns in rawData should be predictor variables +%% - the type of model should be in a variable called 'modName'. +%% +%% The script attempts to make some intelligent choices based on the +%% model being used. For example, if modName is "pls", the script will +%% automatically center and scale the predictor data. There are +%% situations where these choices can (and should be) changed. +%% +%% There are other options that may make sense to change. For example, +%% the user may want to adjust the type of resampling. To find these +%% parts of the script, search on the string 'OPTION'. These parts of +%% the code will document the options. + +\documentclass[14pt]{report} +\usepackage{amsmath} +\usepackage[pdftex]{graphicx} +\usepackage{color} +\usepackage{ctable} +\usepackage{xspace} +\usepackage{fancyvrb} +\usepackage{fancyhdr} +\usepackage{lastpage} +\usepackage{longtable} +\usepackage{algorithm2e} +\usepackage[ + colorlinks=true, + linkcolor=blue, + citecolor=blue, + urlcolor=blue] + {hyperref} +\usepackage{lscape} +\usepackage{Sweave} +\SweaveOpts{keep.source = TRUE} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +% define new colors for use +\definecolor{darkgreen}{rgb}{0,0.6,0} +\definecolor{darkred}{rgb}{0.6,0.0,0} +\definecolor{lightbrown}{rgb}{1,0.9,0.8} +\definecolor{brown}{rgb}{0.6,0.3,0.3} +\definecolor{darkblue}{rgb}{0,0,0.8} +\definecolor{darkmagenta}{rgb}{0.5,0,0.5} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\newcommand{\bld}[1]{\mbox{\boldmath $$#1$$}} +\newcommand{\shell}[1]{\mbox{$$#1$$}} +\renewcommand{\vec}[1]{\mbox{\bf {#1}}} + +\newcommand{\ReallySmallSpacing}{\renewcommand{\baselinestretch}{.6}\Large\normalsize} +\newcommand{\SmallSpacing}{\renewcommand{\baselinestretch}{1.1}\Large\normalsize} + +\newcommand{\halfs}{\frac{1}{2}} + +\setlength{\oddsidemargin}{-.25 truein} +\setlength{\evensidemargin}{0truein} +\setlength{\topmargin}{-0.2truein} +\setlength{\textwidth}{7 truein} +\setlength{\textheight}{8.5 truein} +\setlength{\parindent}{0.20truein} +\setlength{\parskip}{0.10truein} + +\setcounter{LTchunksize}{50} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\pagestyle{fancy} +\lhead{} +%% OPTION Report header name +\chead{Classification Model Script} +\rhead{} +\lfoot{} +\cfoot{} +\rfoot{\thepage\ of \pageref{LastPage}} +\renewcommand{\headrulewidth}{1pt} +\renewcommand{\footrulewidth}{1pt} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%% OPTION Report title and modeler name +\title{Classification Model Script using $METHOD} +\author{"Lynn Group with M. Kuhn, SCIS, JNU, New Delhi"} + +\begin{document} + +\maketitle + +\thispagestyle{empty} +<>= +# sets values for variables used later in the program to prevent the \Sexpr error on parsing with Sweave +numSamples='' +classDistString='' +missingText='' +numPredictors='' +numPCAcomp='' +pcaText='' +nzvText='' +corrText='' +ppText='' +varText='' +splitText="Dummy Text" +nirText="Dummy Text" +# pctTrain is a variable that is initialised in Data splitting, and reused later in testPred +pctTrain=0.8 +Smpling='' +nzvText1='' +classDistString1='' +dwnsmpl='' +upsmpl='' + +@ +<>= +library(Hmisc) +library(caret) +library(pROC) +versionTest <- compareVersion(packageDescription("caret")$$Version, + "4.65") +if(versionTest < 0) stop("caret version 4.65 or later is required") + +library(RColorBrewer) + + +listString <- function (x, period = FALSE, verbose = FALSE) +{ + if (verbose) cat("\n entering listString\n") + flush.console() + if (!is.character(x)) + x <- as.character(x) + numElements <- length(x) + out <- if (length(x) > 0) { + switch(min(numElements, 3), x, paste(x, collapse = " and "), + { + x <- paste(x, c(rep(",", numElements - 2), " and", ""), sep = "") + paste(x, collapse = " ") + }) + } + else "" + if (period) out <- paste(out, ".", sep = "") + if (verbose) cat(" leaving listString\n\n") + flush.console() + out +} + +resampleStats <- function(x, digits = 3) + { + bestPerf <- x$$bestTune + colnames(bestPerf) <- gsub("^\\.", "", colnames(bestPerf)) + out <- merge(x$$results, bestPerf) + out <- out[, colnames(out) %in% x$$perfNames] + names(out) <- gsub("ROC", "area under the ROC curve", names(out), fixed = TRUE) + names(out) <- gsub("Sens", "sensitivity", names(out), fixed = TRUE) + names(out) <- gsub("Spec", "specificity", names(out), fixed = TRUE) + names(out) <- gsub("Accuracy", "overall accuracy", names(out), fixed = TRUE) + names(out) <- gsub("Kappa", "Kappa statistics", names(out), fixed = TRUE) + + out <- format(out, digits = digits) + listString(paste(names(out), "was", out)) + } + +twoClassNoProbs <- function (data, lev = NULL, model = NULL) +{ + out <- c(sensitivity(data[, "pred"], data[, "obs"], lev[1]), + specificity(data[, "pred"], data[, "obs"], lev[2]), + confusionMatrix(data[, "pred"], data[, "obs"])$$overall["Kappa"]) + + names(out) <- c("Sens", "Spec", "Kappa") + out +} + + + +##OPTION: model name: see ?train for more values/models +modName <- "$METHOD" + + +load("$RDATA") +rawData <- dataX +rawData$$outcome <- dataY + +@ + + +\section*{Data Sets}\label{S:data} + +%% OPTION: provide some background on the problem, the experimental +%% data, how the compounds were selected etc + +<>= +if(!any(names(rawData) == "outcome")) stop("a variable called outcome should be in the data set") +if(!is.factor(rawData$$outcome)) stop("the outcome should be a factor vector") + +## OPTION: when there are only two classes, the first level of the +## factor is used as the "positive" or "event" for calculating +## sensitivity and specificity. Adjust the outcome factor accordingly. +numClasses <- length(levels(rawData$$outcome)) +numSamples <- nrow(rawData) +numPredictors <- ncol(rawData) - 1 +predictorNames <- names(rawData)[names(rawData) != "outcome"] + +isNum <- apply(rawData[,predictorNames, drop = FALSE], 2, is.numeric) +if(any(!isNum)) stop("all predictors in rawData should be numeric") + +classTextCheck <- all.equal(levels(rawData$$outcome), make.names(levels(rawData$$outcome))) +if(!classTextCheck) warning("the class levels are not valid R variable names; this may cause errors") + +## Get the class distribution +classDist <- table(rawData$$outcome) +classDistString <- paste("``", + names(classDist), + "'' ($$n$$=", + classDist, + ")", + sep = "") +classDistString <- listString(classDistString) +@ + +<>= +colRate <- apply(rawData[, predictorNames, drop = FALSE], + 2, function(x) mean(is.na(x))) + +##OPTION thresholds can be changed +colExclude <- colRate > $MISSINGFILTERTHRESHC + +missingText <- "" + +if(any(colExclude)) + { + missingText <- paste(missingText, + ifelse(sum(colExclude) > 1, + " There were ", + " There was "), + sum(colExclude), + ifelse(sum(colExclude) > 1, + " predictors ", + " predictor "), + "with an excessive number of ", + "missing data. ", + ifelse(sum(colExclude) > 1, + " These were excluded. ", + " This was excluded. ")) + predictorNames <- predictorNames[!colExclude] + rawData <- rawData[, names(rawData) %in% c("outcome", predictorNames), drop = FALSE] + } + + +rowRate <- apply(rawData[, predictorNames, drop = FALSE], + 1, function(x) mean(is.na(x))) + +rowExclude <- rowRate > $MISSINGFILTERTHRESHR + + +if(any(rowExclude)) { + missingText <- paste(missingText, + ifelse(sum(rowExclude) > 1, + " There were ", + " There was "), + sum(colExclude), + ifelse(sum(rowExclude) > 1, + " samples ", + " sample "), + "with an excessive number of ", + "missing data. ", + ifelse(sum(rowExclude) > 1, + " These were excluded. ", + " This was excluded. "), + "After filtering, ", + sum(!rowExclude), + " samples remained.") + rawData <- rawData[!rowExclude, ] + hasMissing <- apply(rawData[, predictorNames, drop = FALSE], + 1, function(x) mean(is.na(x))) + } else { + hasMissing <- apply(rawData[, predictorNames, drop = FALSE], + 1, function(x) any(is.na(x))) + missingText <- paste(missingText, + ifelse(missingText == "", + "There ", + "Subsequently, there "), + ifelse(sum(hasMissing) == 1, + "was ", + "were "), + ifelse(sum(hasMissing) > 0, + sum(hasMissing), + "no"), + ifelse(sum(hasMissing) == 1, + "sample ", + "samples "), + "with missing values.") + + rawData <- rawData[complete.cases(rawData),] + + } + +rawData1 <- rawData[,1:length(rawData)-1] +rawData2 <- rawData[,length(rawData)] + +set.seed(222) +nzv1 <- nearZeroVar(rawData1) + if(length(nzv1) > 0) + { + nzvVars1 <- names(rawData1)[nzv1] + rawData <- rawData1[, -nzv1] + rawData$outcome <- rawData2 + nzvText1 <- paste("There were ", + length(nzv1), + " predictors that were removed from original data due to", + " severely unbalanced distributions that", + " could negatively affect the model fit", + ifelse(length(nzv1) > 10, + ".", + paste(": ", + listString(nzvVars1), + ".", + sep = "")), + sep = "") + + } else { +rawData <- rawData1 +rawData$outcome <- rawData2 +nzvText1 <- "" + +} + +remove("rawData1") +remove("rawData2") + +@ + +The initial data set consisted of \Sexpr{numSamples} samples and +\Sexpr{numPredictors} predictor variables. The breakdown of the +outcome data classes were: \Sexpr{classDistString}. + + \Sexpr{missingText} + + \Sexpr{nzvText1} + +<>= + +predictorNames <- names(rawData)[names(rawData) != "outcome"] +numPredictors <- length(predictorNames) +predictors <- rawData[, predictorNames, drop = FALSE] +## PCA will fail with predictors having less than 2 unique values +isZeroVar <- apply(predictors, 2, + function(x) length(unique(x)) < 2) +if(any(isZeroVar)) predictors <- predictors[, !isZeroVar, drop = FALSE] +## For whatever, only the formula interface to prcomp +## handles missing values +pcaForm <- as.formula( + paste("~", + paste(names(predictors), collapse = "+"))) +pca <- prcomp(pcaForm, + data = predictors, + center = TRUE, + scale. = TRUE, + na.action = na.omit) +## OPTION: the number of components plotted/discussed can be set +numPCAcomp <- $PCACOMP +pctVar <- pca$$sdev^2/sum(pca$$sdev^2)*100 +pcaText <- paste(round(pctVar[1:numPCAcomp], 1), + "\\\\%", + sep = "") +pcaText <- listString(pcaText) +@ + + To get an initial assessment of the separability of the classes, + principal component analysis (PCA) was used to distill the + \Sexpr{numPredictors} predictors down into \Sexpr{numPCAcomp} + surrogate variables (i.e. the principal components) in a manner that + attempts to maximize the amount of information preserved from the + original predictor set. Figure \ref{F:inititalPCA} contains plots of + the first \Sexpr{numPCAcomp} components, which accounted for + \Sexpr{pcaText} percent of the variability in the original predictors + (respectively). + + +%% OPTION: remark on how well (or poorly) the data separated + + \setkeys{Gin}{width = 0.8\textwidth} + \begin{figure}[p] + \begin{center} + +<>= +trellis.par.set(caretTheme(), warn = TRUE) +if(numPCAcomp == 2) + { + axisRange <- extendrange(pca$$x[, 1:2]) + print( + xyplot(PC1 ~ PC2, + data = as.data.frame(pca$$x), + type = c("p", "g"), + groups = rawData$$outcome, + auto.key = list(columns = 2), + xlim = axisRange, + ylim = axisRange)) + } else { + axisRange <- extendrange(pca$$x[, 1:numPCAcomp]) + print( + splom(~as.data.frame(pca$$x)[, 1:numPCAcomp], + type = c("p", "g"), + groups = rawData$$outcome, + auto.key = list(columns = 2), + as.table = TRUE, + prepanel.limits = function(x) axisRange + )) + + } + +@ + + \caption[PCA Plot]{A plot of the first \Sexpr{numPCAcomp} + principal components for the original data set.} + \label{F:inititalPCA} + \end{center} + \end{figure} + + + +<>= + + ## OPTION: in small samples sizes, you may not want to set aside a + ## training set and focus on the resampling results. + +set.seed(1234) +dataX <- rawData[,1:length(rawData)-1] +dataY <- rawData[,length(rawData)] + + Smpling <- "$SAAMPLING" + +if(Smpling=="downsampling") +{ +dwnsmpl <- downSample(dataX,dataY) +rawData <- dwnsmpl[,1:length(dwnsmpl)-1] +rawData$outcome <- dwnsmpl[,length(dwnsmpl)] +remove("dwnsmpl") +remove("dataX") +remove("dataY") +}else if(Smpling=="upsampling"){ +upsmpl <- upSample(dataX,dataY) +rawData <- upsmpl[,1:length(upsmpl)-1] +rawData$outcome <- upsmpl[,length(upsmpl)] +remove("upsmpl") +remove("dataX") +remove("dataY") +}else{remove("dataX") +remove("dataY") +} + + + +numSamples <- nrow(rawData) + +predictorNames <- names(rawData)[names(rawData) != "outcome"] +numPredictors <- length(predictorNames) + + +classDist1 <- table(rawData$outcome) +classDistString1 <- paste("``", + names(classDist1), + "'' ($n$=", + classDist1, + ")", + sep = "") +classDistString1 <- listString(classDistString1) + + pctTrain <- $PERCENT + +if(pctTrain < 1) + { + ## OPTION: seed number can be changed + set.seed(1) + inTrain <- createDataPartition(rawData$$outcome, + p = pctTrain, + list = FALSE) + trainX <- rawData[ inTrain, predictorNames] + testX <- rawData[-inTrain, predictorNames] + trainY <- rawData[ inTrain, "outcome"] + testY <- rawData[-inTrain, "outcome"] + splitText <- paste("The original data were split into ", + "a training set ($$n$$=", + nrow(trainX), + ") and a test set ($$n$$=", + nrow(testX), + ") in a manner that preserved the ", + "distribution of the classes.", + sep = "") + isZeroVar <- apply(trainX, 2, + function(x) length(unique(x)) < 2) + if(any(isZeroVar)) + { + trainX <- trainX[, !isZeroVar, drop = FALSE] + testX <- testX[, !isZeroVar, drop = FALSE] + } + + } else { + trainX <- rawData[, predictorNames] + testX <- NULL + trainY <- rawData[, "outcome"] + testY <- NULL + splitText <- "The entire data set was used as the training set." + } +trainDist <- table(trainY) +nir <- max(trainDist)/length(trainY)*100 +niClass <- names(trainDist)[which.max(trainDist)] +nirText <- paste("The non--information rate is the accuracy that can be ", + "achieved by predicting all samples using the most ", + "dominant class. For these data, the rate is ", + round(nir, 2), "\\\\% using the ``", + niClass, + "'' class.", + sep = "") + +remove("rawData") + +@ + + \Sexpr{splitText} + + \Sexpr{nirText} + +The data set for model building consisted of \Sexpr{numSamples} samples and +\Sexpr{numPredictors} predictor variables. The breakdown of the +outcome data classes were: \Sexpr{classDistString1}. + +<>= +## OPTION: other pre-processing steps can be used +ppSteps <- caret:::suggestions(modName) + +set.seed(2) +if(ppSteps["nzv"]) + { + nzv <- nearZeroVar(trainX) + if(length(nzv) > 0) + { + nzvVars <- names(trainX)[nzv] + trainX <- trainX[, -nzv] + nzvText <- paste("There were ", + length(nzv), + " predictors that were removed from train set due to", + " severely unbalanced distributions that", + " could negatively affect the model", + ifelse(length(nzv) > 10, + ".", + paste(": ", + listString(nzvVars), + ".", + sep = "")), + sep = "") + testX <- testX[, -nzv] + } else nzvText <- "" + } else nzvText <- "" +@ + +\Sexpr{nzvText} + + +<>= +if(ppSteps["corr"]) + { + ## OPTION: + corrThresh <- $THRESHHOLDCOR + highCorr <- findCorrelation(cor(trainX, use = "pairwise.complete.obs"), + corrThresh) + if(length(highCorr) > 0) + { + corrVars <- names(trainX)[highCorr] + trainX <- trainX[, -highCorr] + corrText <- paste("There were ", + length(highCorr), + " predictors that were removed due to", + " large between--predictor correlations that", + " could negatively affect the model fit", + ifelse(length(highCorr) > 10, + ".", + paste(": ", + listString(highCorr), + ".", + sep = "")), + " Removing these predictors forced", + " all pair--wise correlations to be", + " less than ", + corrThresh, + ".", + sep = "") + testX <- testX[, -highCorr] + } else corrText <- "No correlation among data on given threshold" + }else corrText <- "" +@ + + \Sexpr{corrText} + +<>= +ppMethods <- NULL +if(ppSteps["center"]) ppMethods <- c(ppMethods, "center") +if(ppSteps["scale"]) ppMethods <- c(ppMethods, "scale") +if(any(hasMissing) > 0) ppMethods <- c(ppMethods, "knnImpute") +##OPTION other methods, such as spatial sign, can be added to this list + +if(length(ppMethods) > 0) + { + ppInfo <- preProcess(trainX, method = ppMethods) + trainX <- predict(ppInfo, trainX) + if(pctTrain < 1) testX <- predict(ppInfo, testX) + ppText <- paste("The following pre--processing methods were", + " applied to the training", + ifelse(pctTrain < 1, " and test", ""), + " data: ", + listString(ppMethods), + ".", + sep = "") + ppText <- gsub("center", "mean centering", ppText) + ppText <- gsub("scale", "scaling to unit variance", ppText) + ppText <- gsub("knnImpute", + paste(ppInfo$$k, "--nearest neighbor imputation", sep = ""), + ppText) + ppText <- gsub("spatialSign", "the spatial sign transformation", ppText) + ppText <- gsub("pca", "principal component feature extraction", ppText) + ppText <- gsub("ica", "independent component feature extraction", ppText) + } else { + ppInfo <- NULL + ppText <- "" + } + +predictorNames <- names(trainX) +if(nzvText != "" | corrText != "" | ppText != "") + { + varText <- paste("After pre--processing, ", + ncol(trainX), + "predictors remained for modeling.") + } else varText <- "" + +@ + + \Sexpr{ppText} + \Sexpr{varText} + +\clearpage + +\section*{Model Building} + +<>= +numWorkers <- $NUMWORKERS +##OPTION: turn up numWorkers to use MPI +if(numWorkers > 1) + { + mpiCalcs <- function(X, FUN, ...) + { + theDots <- list(...) + parLapply(theDots$$cl, X, FUN) + } + + library(snow) + cl <- makeCluster(numWorkers, "MPI") + } +@ + +<>= +##OPTION: the resampling options can be changed. See +## ?trainControl for details + +resampName <- "$RESAMPNAME" +resampNumber <- $RESAMPLENUMBER +numRepeat <- $NUMREPEAT +resampP <- $RESAMPLENUMBERPERCENT + +modelInfo <- modelLookup(modName) + +if(numClasses == 2) + { + foo <- if(any(modelInfo$$probModel)) twoClassSummary else twoClassNoProbs + } else foo <- defaultSummary + +set.seed(3) +ctlObj <- trainControl(method = resampName, + number = resampNumber, + repeats = numRepeat, + p = resampP, + classProbs = any(modelInfo$$probModel), + summaryFunction = foo) + + +##OPTION select other performance metrics as needed +optMetric <- if(numClasses == 2 & any(modelInfo$$probModel)) "ROC" else "Kappa" + +if(numWorkers > 1) + { + ctlObj$$workers <- numWorkers + ctlObj$$computeFunction <- mpiCalcs + ctlObj$$computeArgs <- list(cl = cl) + } +@ + +<>= +##OPTION expand or contract these grids as needed (or +## add more models + +gridSize <- $SETUPGRIDSIZE + +if(modName %in% c("svmPoly", "svmRadial", "svmLinear", "lvq", "ctree2", "ctree")) gridSize <- 5 +if(modName %in% c("earth", "fda")) gridSize <- 7 +if(modName %in% c("knn", "rocc", "glmboost", "rf", "nodeHarvest")) gridSize <- 10 + +if(modName %in% c("nb")) gridSize <- 2 +if(modName %in% c("pam", "rpart")) gridSize <- 15 +if(modName %in% c("pls")) gridSize <- min(20, ncol(trainX)) + +if(modName == "gbm") + { + tGrid <- expand.grid(.interaction.depth = -1 + (1:5)*2 , + .n.trees = (1:10)*20, + .shrinkage = .1) + } + +if(modName == "nnet") + { + tGrid <- expand.grid(.size = -1 + (1:5)*2 , + .decay = c(0, .001, .01, .1)) + } + +if(modName == "ada") + { + tGrid <- expand.grid(.maxdepth = 1, .iter = c(100,200,300,400), .nu = 1 ) + + } + + +@ + +<>= +##OPTION alter as needed + +set.seed(4) +modelFit <- switch(modName, + gbm = + { + mix <- sample(seq(along = trainY)) + train( + trainX[mix,], trainY[mix], modName, + verbose = FALSE, + bag.fraction = .9, + metric = optMetric, + trControl = ctlObj, + tuneGrid = tGrid) + }, + + multinom = + { + train( + trainX, trainY, modName, + trace = FALSE, + metric = optMetric, + maxiter = 1000, + MaxNWts = 5000, + trControl = ctlObj, + tuneLength = gridSize) + }, + + nnet = + { + train( + trainX, trainY, modName, + metric = optMetric, + linout = FALSE, + trace = FALSE, + maxiter = 1000, + MaxNWts = 5000, + trControl = ctlObj, + tuneGrid = tGrid) + + }, + + svmRadial =, svmPoly =, svmLinear = + { + train( + trainX, trainY, modName, + metric = optMetric, + scaled = TRUE, + trControl = ctlObj, + tuneLength = gridSize) + }, + { + train(trainX, trainY, modName, + trControl = ctlObj, + metric = optMetric, + tuneLength = gridSize) + }) + +@ + +<>= +summaryText <- "" + +resampleName <- switch(tolower(modelFit$$control$$method), + boot = paste("the bootstrap (", length(modelFit$$control$$index), " reps)", sep = ""), + boot632 = paste("the bootstrap 632 rule (", length(modelFit$$control$$index), " reps)", sep = ""), + cv = paste("cross-validation (", modelFit$$control$$number, " fold)", sep = ""), + repeatedcv = paste("cross-validation (", modelFit$$control$$number, " fold, repeated ", + modelFit$$control$$repeats, " times)", sep = ""), + lgocv = paste("repeated train/test splits (", length(modelFit$$control$$index), " reps, ", + round(modelFit$$control$$p, 2), "$$\\%$$)", sep = "")) + +tuneVars <- latexTranslate(tolower(modelInfo$$label)) +tuneVars <- gsub("\\#", "the number of ", tuneVars, fixed = TRUE) +if(ncol(modelFit$$bestTune) == 1 && colnames(modelFit$$bestTune) == ".parameter") + { + summaryText <- paste(summaryText, + "\n\n", + "There are no tuning parameters associated with this model.", + "To characterize the model performance on the training set,", + resampleName, + "was used.", + "Table \\\\ref{T:resamps} and Figure \\\\ref{F:profile}", + "show summaries of the resampling results. ") + + } else { + summaryText <- paste("There", + ifelse(nrow(modelInfo) > 1, "are", "is"), + nrow(modelInfo), + ifelse(nrow(modelInfo) > 1, "tuning parameters", "tuning parameter"), + "associated with this model:", + listString(tuneVars, period = TRUE)) + + + + paramNames <- gsub(".", "", names(modelFit$$bestTune), fixed = TRUE) + for(i in seq(along = paramNames)) + { + check <- modelInfo$$parameter %in% paramNames[i] + if(any(check)) + { + paramNames[i] <- modelInfo$$label[which(check)] + } + } + + paramNames <- gsub("#", "the number of ", paramNames, fixed = TRUE) + ## Check to see if there was only one combination fit + summaryText <- paste(summaryText, + "To choose", + ifelse(nrow(modelInfo) > 1, + "appropriate values of the tuning parameters,", + "an appropriate value of the tuning parameter,"), + resampleName, + "was used to generated a profile of performance across the", + nrow(modelFit$$results), + ifelse(nrow(modelInfo) > 1, + "combinations of the tuning parameters.", + "candidate values."), + + "Table \\\\ref{T:resamps} and Figure \\\\ref{F:profile} show", + "summaries of the resampling profile. ", "The final model fitted to the entire training set was:", + listString(paste(latexTranslate(tolower(paramNames)), "=", modelFit$$bestTune[1,]), period = TRUE)) + + } +@ + +\Sexpr{summaryText} + +<>= +tableData <- modelFit$$results + +if(all(modelInfo$$parameter == "parameter") && resampName == "boot632") + { + tableData <- tableData[,-1, drop = FALSE] + colNums <- c( length(modelFit$perfNames), length(modelFit$perfNames), length(modelFit$perfNames)) + colLabels <- c("Mean", "Standard Deviation","Apparant") + constString <- "" + isConst <- NULL + }else if (all(modelInfo$$parameter == "parameter") && (resampName == "boot" | resampName == "cv" | resampName = "repeatedcv" )){ + tableData <- tableData[,-1, drop = FALSE] + colNums <- c(length(modelFit$perfNames), length(modelFit$perfNames)) + colLabels <- c("Mean", "Standard Deviation") + constString <- "" + isConst <- NULL + }else if (all(modelInfo$$parameter == "parameter") && resampName == "LOOCV" ){ + tableData <- tableData[,-1, drop = FALSE] + colNums <- length(modelFit$perfNames) + colLabels <- c("Measures") + constString <- "" + isConst <- NULL + }else if (all(modelInfo$$parameter != "parameter") && (resampName == "boot" | resampName == "cv" | resampName = "repeatedcv" )){ + isConst <- apply(tableData[, modelInfo$$parameter, drop = FALSE], + 2, + function(x) length(unique(x)) == 1) + + numParamInTable <- sum(!isConst) + + if(any(isConst)) + { + constParam <- modelInfo$$parameter[isConst] + constValues <- format(tableData[, constParam, drop = FALSE], digits = 4)[1,,drop = FALSE] + tableData <- tableData[, !(names(tableData) %in% constParam), drop = FALSE] + constString <- paste("The tuning", + ifelse(sum(isConst) > 1, + "parmeters", + "parameter"), + listString(paste("``", names(constValues), "''", sep = "")), + ifelse(sum(isConst) > 1, + "were", + "was"), + "held constant at", + ifelse(sum(isConst) > 1, + (sum(isConst) > 1, + "a value of", + "values of"), + listString(constValues[1,])) + + } else constString <- "" + + cn <- colnames(tableData) + for(i in seq(along = cn)) + { + check <- modelInfo$$parameter %in% cn[i] + if(any(check)) + { + cn[i] <- modelInfo$$label[which(check)] + } + } + colnames(tableData) <- cn + + colNums <- c(numParamInTable, + length(modelFit$perfNames), + length(modelFit$perfNames)) + colLabels <- c("", "Meaures", "Standard Deviation") + + } else if (all(modelInfo$$parameter != "parameter") && (resampName == "LOOCV" )){ + isConst <- apply(tableData[, modelInfo$$parameter, drop = FALSE], + 2, + function(x) length(unique(x)) == 1) + + numParamInTable <- sum(!isConst) + + if(any(isConst)) + { + constParam <- modelInfo$$parameter[isConst] + constValues <- format(tableData[, constParam, drop = FALSE], digits = 4)[1,,drop = FALSE] + tableData <- tableData[, !(names(tableData) %in% constParam), drop = FALSE] + constString <- paste("The tuning", + ifelse(sum(isConst) > 1, + "parmeters", + "parameter"), + listString(paste("``", names(constValues), "''", sep = "")), + ifelse(sum(isConst) > 1, + "were", + "was"), + "held constant at", + ifelse(sum(isConst) > 1, + (sum(isConst) > 1, + "a value of", + "values of"), + listString(constValues[1,])) + + } else constString <- "" + + cn <- colnames(tableData) + for(i in seq(along = cn)) + { + check <- modelInfo$$parameter %in% cn[i] + if(any(check)) + { + cn[i] <- modelInfo$$label[which(check)] + } + } + colnames(tableData) <- cn + +colNums <- c(numParamInTable, + length(modelFit$perfNames)) + colLabels <- c("", "Measures" ) + + +} else if (all(modelInfo$$parameter != "parameter") && (resampName == "boot632" )) { + isConst <- apply(tableData[, modelInfo$$parameter, drop = FALSE], + 2, + function(x) length(unique(x)) == 1) + + numParamInTable <- sum(!isConst) + + if(any(isConst)) + { + constParam <- modelInfo$$parameter[isConst] + constValues <- format(tableData[, constParam, drop = FALSE], digits = 4)[1,,drop = FALSE] + tableData <- tableData[, !(names(tableData) %in% constParam), drop = FALSE] + constString <- paste("The tuning", + ifelse(sum(isConst) > 1, + "parmeters", + "parameter"), + listString(paste("``", names(constValues), "''", sep = "")), + ifelse(sum(isConst) > 1, + "were", + "was"), + "held constant at", + ifelse(sum(isConst) > 1, + (sum(isConst) > 1, + "a value of", + "values of"), + listString(constValues[1,])) + + } else constString <- "" + + cn <- colnames(tableData) + for(i in seq(along = cn)) + { + check <- modelInfo$$parameter %in% cn[i] + if(any(check)) + { + cn[i] <- modelInfo$$label[which(check)] + } + } + colnames(tableData) <- cn + + colNums <- c(numParamInTable, + length(modelFit$$perfNames), + length(modelFit$$perfNames), + length(modelFit$$perfNames)) + colLabels <- c("", "Mean", "Standard Deviation", "Apparant") +} else { + constString <- paste("you played with wrong parameters in resampling method") + } + + + + +colnames(tableData) <- gsub("SD$$", "", colnames(tableData)) +colnames(tableData) <- gsub("Apparent$$", "", colnames(tableData)) +colnames(tableData) <- latexTranslate(colnames(tableData)) +rownames(tableData) <- latexTranslate(rownames(tableData)) + +latex(tableData, + rowname = NULL, + file = "", + cgroup = colLabels, + n.cgroup = colNums, + where = "h!", + digits = 4, + longtable = nrow(tableData) > 30, + caption = paste(resampleName, "results from the model fit.", constString), + label = "T:resamps") +@ + + \setkeys{Gin}{ width = 0.9\textwidth} + \begin{figure}[b] + \begin{center} + +<>= + trellis.par.set(caretTheme(), warn = TRUE) +if(all(modelInfo$$parameter == "parameter") | all(isConst) | modName == "nb") + { + resultsPlot <- resampleHist(modelFit) + plotCaption <- paste("Distributions of model performance from the ", + "training set estimated using ", + resampleName) + } else { + if(modName %in% c("svmPoly", "svmRadial", "svmLinear")) + { + resultsPlot <- plot(modelFit, + metric = optMetric, + xTrans = function(x) log10(x)) + resultsPlot <- update(resultsPlot, + type = c("g", "p", "l"), + ylab = paste(optMetric, " (", resampleName, ")", sep = "")) + + } else { + resultsPlot <- plot(modelFit, + metric = optMetric) + resultsPlot <- update(resultsPlot, + type = c("g", "p", "l"), + ylab = paste(optMetric, " (", resampleName, ")", sep = "")) + } + plotCaption <- paste("A plot of the estimates of the", + optMetric, + "values calculated using", + resampleName) + } +print(resultsPlot) +@ + \caption[Performance Plot]{\Sexpr{plotCaption}.} + \label{F:profile} + \end{center} + \end{figure} + + +<>= +if(numWorkers > 1) stopCluster(cl) +@ + +<>= + if(pctTrain < 1) + { + cat("\\clearpage\n\\section*{Test Set Results}\n\n") + classPred <- predict(modelFit, testX) + cm <- confusionMatrix(classPred, testY) + values <- cm$$overall[c("Accuracy", "Kappa", "AccuracyPValue", "McnemarPValue")] + + values <- values[!is.na(values) & !is.nan(values)] + values <- c(format(values[1:2], digits = 3), + format.pval(values[-(1:2)], digits = 5)) + nms <- c("the overall accuracy", "the Kappa statistic", + "the $$p$$--value that accuracy is greater than the no--information rate", + "the $$p$$--value of concordance from McNemar's test") + nms <- nms[seq(along = values)] + names(values) <- nms + + if(any(modelInfo$$probModel)) + { + classProbs <- extractProb(list(fit = modelFit), + testX = testX, + testY = testY) + classProbs <- subset(classProbs, dataType == "Test") + if(numClasses == 2) + { + tmp <- twoClassSummary(classProbs, lev = levels(classProbs$$obs)) + tmp <- c(format(tmp, digits = 3)) + names(tmp) <- c("the sensitivity", "the specificity", + "the area under the ROC curve") + values <- c(values, tmp) + + } + probPlot <- plotClassProbs(classProbs) + } + testString <- paste("Based on the test set of", + nrow(testX), + "samples,", + listString(paste(names(values), "was", values), period = TRUE), + "The confusion matrix for the test set is shown in Table", + "\\\\ref{T:cm}.") + testString <- paste(testString, + " Using ", resampleName, + ", the training set estimates were ", + resampleStats(modelFit), + ".", + sep = "") + + if(any(modelInfo$$probModel)) testString <- paste(testString, + "Histograms of the class probabilities", + "for the test set samples are shown in", + "Figure \\\\ref{F:probs}", + ifelse(numClasses == 2, + " and the test set ROC curve is in Figure \\\\ref{F:roc}.", + ".")) + + + + latex(cm$$table, + title = "", + file = "", + where = "h", + cgroup = "Observed Values", + n.cgroup = numClasses, + caption = "The confusion matrix for the test set", + label = "T:cm") + + } else testString <- "" +@ +\Sexpr{testString} + + +<>= + if(any(modelInfo$$probModel)) + { + cat( + paste("\\begin{figure}[p]\n", + "\\begin{center}\n", + "\\includegraphics{classProbs}", + "\\caption[PCA Plot]{Class probabilities", + "for the test set. Each panel contains ", + "separate classes}\n", + "\\label{F:probs}\n", + "\\end{center}\n", + "\\end{figure}")) + } + if(any(modelInfo$$probModel) & numClasses == 2) + { + cat( + paste("\\begin{figure}[p]\n", + "\\begin{center}\n", + "\\includegraphics[clip, width = .8\\textwidth]{roc}", + "\\caption[ROC Plot]{ROC Curve", + "for the test set.}\n", + "\\label{F:roc}\n", + "\\end{center}\n", + "\\end{figure}")) + } +@ +<>= + if(any(modelInfo$$probModel)) + { + pdf("classProbs.pdf", height = 7, width = 7) + trellis.par.set(caretTheme(), warn = FALSE) + print(probPlot) + dev.off() + } + + if(any(modelInfo$$probModel) & numClasses == 2) + { resPonse<-testY + preDictor<-classProbs[, levels(trainY)[1]] + pdf("roc.pdf", height = 8, width = 8) +# from pROC example at http://web.expasy.org/pROC/screenshots.htm + plot.roc(resPonse, preDictor, # data + percent=TRUE, # show all values in percent + partial.auc=c(100, 90), partial.auc.correct=TRUE, # define a partial AUC (pAUC) + print.auc=TRUE, #display pAUC value on the plot with following options: + print.auc.pattern="Corrected pAUC (100-90%% SP):\n%.1f%%", print.auc.col="#1c61b6", + auc.polygon=TRUE, auc.polygon.col="#1c61b6", # show pAUC as a polygon + max.auc.polygon=TRUE, max.auc.polygon.col="#1c61b622", # also show the 100% polygon + main="Partial AUC (pAUC)") + plot.roc(resPonse, preDictor, + percent=TRUE, add=TRUE, type="n", # add to plot, but don't re-add the ROC itself (useless) + partial.auc=c(100, 90), partial.auc.correct=TRUE, + partial.auc.focus="se", # focus pAUC on the sensitivity + print.auc=TRUE, print.auc.pattern="Corrected pAUC (100-90%% SE):\n%.1f%%", print.auc.col="#008600", + print.auc.y=40, # do not print auc over the previous one + auc.polygon=TRUE, auc.polygon.col="#008600", + max.auc.polygon=TRUE, max.auc.polygon.col="#00860022") + dev.off() + } + + +@ + +\section*{Versions} + +<>= +toLatex(sessionInfo()) + +@ + +<>= +## change this to the name of modName.... +Fit<-modelFit +save(Fit,file="$METHOD-Fit.RData") +@ +The model was built using $METHOD and is saved as $METHOD-Fit.RData for reuse. This contains the variable Fit. + +\end{document}''' + + return template4Rnw diff -r 000000000000 -r ee374e48024f tool2/templateLibrary.pyc Binary file tool2/templateLibrary.pyc has changed diff -r 000000000000 -r ee374e48024f tool3/.Rhistory diff -r 000000000000 -r ee374e48024f tool3/Preold_advance.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool3/Preold_advance.R Thu Jan 21 00:34:58 2016 -0500 @@ -0,0 +1,95 @@ +########## +args <- commandArgs(T) +arg1 <- args[1] +arg2 <- args[2] +arg3 <- args[3] +#source("~/galaxy-dist/tools/mpdstoolsV2/tool3/Preold.R") +#pre(arg1,arg2,arg3) +set.seed(1) +pre <- function(args1,args2,args3){ +#args <- commandArgs(TRUE) +nTrain <- read.csv(args1,row.names= 1, header = T) # example nTrain.csv file of unknown activity +#save(nTrain,file = "nTrain.RData") +#load("nTrain.RData") +load(args2) # model generated from previous programn +newdata <- nTrain +modelFit <- Fit +########### +# input csv file must contaion the exact same column as used in model building # +# Also do pre-proccessing by means of centering and scaling +## problem in s4 object so first check that the given model has s4 object in +## >isS4(Fit$finalmodel) if it is s4 than add in with elseif loop +## eg . isS4(plsFit$finalModel) == TRUE +f=function(x){ + x<-as.numeric(as.character(x)) #first convert each column into numeric if it is from factor + x[is.na(x)] =median(x, na.rm=TRUE) #convert the item with NA to median value from the column + x #display the column +} + +f2=function(x){ + all(is.na(x)) + } + + +fop <- apply(newdata,2,f2) +allcolumnmissing <- which(fop) +if (length(allcolumnmissing) > 0){ +newdata[,allcolumnmissing] <- 0 +newdata[,allcolumnmissing] <- newdata[,allcolumnmissing] + runif(3,0,0.00000000000000000000000000000001) ### add noise} +} + +library(caret) + +#if(as.character(!isS4(Fit$finalModel == "TRUE"))) +if((Fit$method != "svmRadial") && (Fit$method != "svmLinear")) +{ + reqcol <- Fit$finalModel$xNames + newdata <- newdata[,reqcol] + newdata <- apply(newdata,2,f) + newdata <- newdata + runif(3,0,0.01) ### add noise to overcome from NZV error + newdata1 <- preProcess(newdata, method = c("center", "scale")) + newdata11 <- predict(newdata1,newdata) +########### + library(stats) + testpredict <- predict(modelFit,newdata11) + Label <- levels(testpredict) + a1 <- Label[1] + a2 <- Label[2] + probpredict <- predict(modelFit,newdata11,type="prob") + names <- as.data.frame(rownames(nTrain)) + colnames(names) <- "COMPOUND" + activity <- as.data.frame(testpredict) + colnames(activity) <- "PREDICTED ACTIVITY" + colnames(probpredict) <- c(eval(a1),eval(a2)) + Prob <- as.data.frame(probpredict) + dw <- format(cbind(names,Prob,activity),justify="centre") + write.table(dw,file=args3,row.names=FALSE,sep="\t") +} else if((Fit$method == "svmRadial") | (Fit$method == "svmLinear")){ + library(stats) + reqcol <- colnames(Fit$trainingData) + reqcol <- reqcol[1:length(reqcol)-1] + newdata <- newdata[,reqcol] + + newdata <- apply(newdata,2,f) + newdata <- newdata + runif(3,0,0.01) ### add little noise to overcome from NZV problem + newdata1 <- preProcess(newdata, method = c("center", "scale")) + newdata11 <- predict(newdata1,newdata) + testpredict <- predict(modelFit,newdata11) + Label <- levels(testpredict) + a1 <- Label[1] + a2 <- Label[2] + probpredict <- predict(modelFit,newdata11,type="prob") + names <- as.data.frame(rownames(nTrain)) + colnames(names) <- "COMPOUND" + activity <- as.data.frame(testpredict) + colnames(activity) <- "PREDICTED ACTIVITY" + colnames(probpredict) <- c(eval(a1),eval(a2)) + Prob <- as.data.frame(probpredict) + dw <- format(cbind(names,Prob,activity),justify="centre") + write.table(dw,file=args3,row.names=FALSE,sep="\t") +}else { + dw <- "There is something wrong in data or model" + write.csv(dw,file=args3,row.names=FALSE) +} +} +pre(arg1,arg2,arg3) diff -r 000000000000 -r ee374e48024f tool3V2.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool3V2.xml Thu Jan 21 00:34:58 2016 -0500 @@ -0,0 +1,60 @@ + + + used to predict activity based on given model + + + + R_ROOT_DIR + R + caret-tools + +tool2/Preold_advance.R $file1 $model $output1 2>/dev/null + + + + + + + + + + +.. class:: infomark + +Make sure this file **must** contain **all** or **more features** than **input** "csv file" used for **model building** + +---------- + +**Input "csv file" must be as follows** + +---------- + + +Example file:- + + + +# example.csv + + feature1,feature2,feature3,..,featureN + +ro1 234,2.3,34,7,..,0.9 + +ro2 432,3.4,23.1,12,..,0.12 + +ro3 692,23,12.2,19,..,0.14 + + +----------- + +**MODEL** + +Choose model file received from model building step. + +Model file has "data" file format can be seen by + +clicking on output files shown in history . + + + + diff -r 000000000000 -r ee374e48024f tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Thu Jan 21 00:34:58 2016 -0500 @@ -0,0 +1,292 @@ + + + + + + + + + + + + + + +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/Rcpp_0.12.1.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/Matrix_1.2-2.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/RcppEigen_0.3.2.5.0.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/minqa_1.2.4.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/nloptr_1.0.4.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/lme4_1.1-9.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/e1071_1.6-7.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/TeachingDemos_2.9.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/plotrix_3.5-12.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/plotmo_3.1.4.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/earth_4.4.2.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/fastICA_1.2-0.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/iterators_1.0.7.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/foreach_1.4.2.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/gam_1.12.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/numDeriv_2014.2-1.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/lava_1.4.1.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/prodlim_1.5.1.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/rpart_4.1-10.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/ipred_0.9-5.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/kernlab_0.9-22.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/combinat_0.0-8.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/klaR_0.6-12.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/ellipse_0.3-8.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/mda_0.4-7.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/mlbench_2.1-1.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/lattice_0.20-33.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/zoo_1.7-12.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/sandwich_2.3-3.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/strucchange_1.5-1.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/modeltools_0.2-21.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/mvtnorm_1.0-3.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/coin_1.0-24.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/party_1.0-18.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/partykit_1.0-3.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/pls_2.4-3.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/plyr_1.8.3.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/pROC_1.8.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/proxy_0.4-15.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/randomForest_4.6-10.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/RANN_2.5.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/spls_2.2-1.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/subselect_0.12-5.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/pamr_1.55.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/superpc_1.09.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/stringi_0.5-5.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/magrittr_1.5.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/stringr_1.0.0.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/reshape2_1.4.1.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/Cubist_0.0.18.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/digest_0.6.8.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/memoise_0.2.1.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/crayon_1.3.1.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/testthat_0.10.0.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/pbkrtest_0.4-2.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/SparseM_1.7.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/MatrixModels_0.4-1.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/quantreg_5.19.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/car_2.0-25.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/ISwR_2.0-7.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/iterators_1.0.7.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/nlme_3.1-122.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/profileModel_0.5-9.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/brglm_0.5-9.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/gtools_3.5.0.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/BradleyTerry2_1.0-6.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/gtable_0.1.2.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/RColorBrewer_1.1-2.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/dichromat_2.0-0.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/colorspace_1.2-6.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/munsell_0.4.2.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/labeling_0.3.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/scales_0.3.0.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/BiocGenerics_0.8.0.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/graph_1.40.1.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/Rgraphviz_2.6.0.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/proto_0.3-10.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/ggplot2_1.0.1.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/caret_6.0-52.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/abind_1.4-3.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/acepack_1.3-3.3.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/ada_2.0-3.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/adabag_4.0.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/lmtest_0.9-34.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/Formula_1.2-1.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/AER_1.2-4.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/zlibbioc_1.8.0.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/affyio_1.30.0.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/preprocessCore_1.24.0.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/Biobase_2.22.0.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/BiocInstaller_1.12.1.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/affy_1.40.0.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/gamlss.data_4.3-0.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/gamlss.dist_4.3-5.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/gamlss_4.3-6.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/AGD_0.35.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/akima_0.5-11.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/animation_2.3.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/DBI_0.3.1.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/RSQLite_1.0.0.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/IRanges_1.20.7.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/AnnotationDbi_1.24.0.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/ape_3.3.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/bdsmatrix_1.3-2.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/biglm_0.9-1.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/bitops_1.0-6.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/rFerns_1.1.0.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/Boruta_4.0.0.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/gbm_2.1.1.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/bst_0.3-4.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/C50_0.1.0-24.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/ca_0.58.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/heplots_1.0-16.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/candisc_0.6-7.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/catdata_1.2.1.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/cba_0.2-15.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/chron_2.3-47.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/clue_0.3-50.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/coda_0.17-1.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/CompQuadForm_1.4.1.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/corpcor_1.6.8.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/corrplot_0.73.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/coxme_2.2-5.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/date_1.2-34.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/deldir_0.1-9.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/manipulate_1.0.1.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/BH_1.58.0-1.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/DescTools_0.99.13.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/diptest_0.75-7.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/doMC_1.3.3.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/snow_0.3-13.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/doSNOW_1.0.12.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/som_0.3-5.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/sp_1.2-0.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/spam_1.0-1.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/tensor_1.5.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/polyclip_1.3-2.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/goftest_1.0-3.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/spatstat_1.40-0.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/stabs_0.5-1.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/dynlm_0.3-3.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/effects_3.0-4.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/expm_0.99-1.1.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/Fahrmeir_2015.6.25.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/fit.models_0.5-10.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/forward_1.0.3.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/GAMBoost_1.2-3.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/gclus_1.3.1.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/gee_4.13-19.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/geepack_1.2-0.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/glmmML_1.0.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/glmnet_2.0-2.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/qvcalc_0.8-9.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/relimp_1.0-4.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/gnm_1.0-8.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/GPArotation_2014.11-1.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/gpclib_1.5-5.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/gridBase_0.4-7.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/hexbin_1.27.0.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/highlight_0.4.7.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/latticeExtra_0.6-26.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/gridExtra_2.0.0.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/Hmisc_3.16-0.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/HSAUR_1.3-7.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/registry_0.3.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/xtable_1.7-4.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/pkgmaker_0.22.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/rngtools_1.2.4.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/doParallel_1.0.8.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/NMF_0.20.6.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/irlba_1.0.3.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/igraph_1.0.1.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/igraphdata_1.0.1.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/ineq_0.2-13.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/inline_0.3.14.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/isa2_0.3.4.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/quadprog_1.5-5.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/kinship2_1.6.4.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/lars_1.2.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/leaps_2.9.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/limma_3.18.13.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/locfit_1.5-9.1.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/logspline_2.1.8.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/lpSolve_5.6.12.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/lqa_1.0-3.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/maps_2.3-11.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/mapproj_1.2-4.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/maptools_0.8-36.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/marray_1.40.0.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/Matching_4.8-3.4.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/MatchIt_2.4-21.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/miscTools_0.6-16.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/maxLik_1.2-4.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/MBESS_3.3.3.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/mclust_5.0.2.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/MCMCpack_1.3-3.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/mice_2.22.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/microbenchmark_1.4-2.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/misc3d_0.8-4.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/mitools_2.3.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/mix_1.0-9.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/mnormt_1.5-3.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/mondate_0.10.01.02.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/MPV_1.38.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/TH.data_1.0-6.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/multcomp_1.4-1.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/multicool_0.1-7.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/multtest_2.18.0.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/MuMIn_1.15.1.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/mvnormtest_0.1-9.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/nor1mix_1.2-1.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/nws_1.7.0.1.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/oz_1.0-20.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/pan_1.3.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/PBSmapping_2.69.76.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/pcaPP_1.9-60.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/penalized_0.9-45.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/pkgKitten_0.1.3.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/PKPDmodels_0.3.2.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/plm_1.4-0.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/png_0.1-7.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/scatterplot3d_0.3-36.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/poLCA_1.4.1.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/polspline_1.1.12.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/sfsmisc_1.0-28.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/polycor_0.7-8.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/polynom_1.3-8.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/prefmod_0.8-32.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/pscl_1.4.9.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/quantreg_5.19.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/rbenchmark_1.0.0.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/Rcompression_0.93-2.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/rgenoud_5.7-12.4.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/rhdf5_2.6.0.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/rlecuyer_0.3-3.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/rmeta_2.16.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/survival_2.38-3.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/rms_4.3-1.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/RSVGTipsDevice_1.0-4.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/Rsymphony_0.1-21.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/RUnit_0.4.29.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/systemfit_1.1-18.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/sampleSelection_1.0-2.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/shapefiles_0.7.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/slam_0.1-32.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/Sleuth2_1.0-7.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/SuppDists_1.1-9.1.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/survey_3.30-3.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/tables_0.7.79.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/testit_0.4.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/timeDate_3012.100.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/tis_1.30.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/tripack_1.3-6.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/truncreg_0.2-1.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/tseries_0.10-34.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/TSA_1.01.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/urca_1.2-8.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/vcd_1.4-1.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/VGAM_0.9-8.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/VGAMdata_0.9-7.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/Zelig_4.2-1.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/UPLOAD2/Rmpi_0.6-5.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/uplod/caTools_1.17.1.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/uplod/pbapply_1.1-1.tar.gz?raw=true +https://github.com/Deep2106/R-3.1.3caret/blob/CARET/DataMiningWORK/GIT_GCAC_CODES/GIT/uplod/caretEnsemble_1.0.0.tar.gz?raw=true + + + + + + +caret tools installed successfully. + + + +