Mercurial > repos > sblanck > mpagenomics_wrappers
changeset 8:d5ed62b4d3ac draft default tip
planemo upload for repository https://github.com/sblanck/MPAgenomics4Galaxy/tree/master/mpagenomics_wrappers commit 689d0d8dc899a683ee18700ef385753559850233
author | sblanck |
---|---|
date | Tue, 12 May 2020 13:40:07 +0000 |
parents | f2d24110f65a |
children | |
files | segmentFracB.R segmentFracB.py segmentFracB.xml selection.R selection.py selection.xml selectionExtracted.R selectionExtracted.xml |
diffstat | 8 files changed, 452 insertions(+), 198 deletions(-) [+] |
line wrap: on
line diff
--- a/segmentFracB.R Fri Apr 10 13:32:59 2020 +0000 +++ b/segmentFracB.R Tue May 12 13:40:07 2020 +0000 @@ -1,14 +1,70 @@ +#!/usr/bin/env Rscript +# setup R error handling to go to stderr +options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) + +# we need that to not crash galaxy with an UTF8 error on German LC settings. +loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") + +library("optparse") + +##### Read options +option_list=list( + make_option("--chrom",type="character",default=NULL, dest="chrom"), + make_option("--input",type="character",default=NULL, dest="input"), + make_option("--output",type="character",default=NULL, dest="output"), + make_option("--new_file_path",type="character",default=NULL, dest="new_file_path"), + make_option("--settings_type",type="character",default=NULL, dest="settingsType"), + make_option("--output_graph",type="character",default=NULL, dest="outputgraph"), + make_option("--zip_figures",type="character",default=NULL, dest="zipfigures"), + make_option("--settings_tumor",type="character",default=NULL, dest="settingsTypeTumor"), + make_option("--outputlog",type="character",default=NULL, dest="outputlog"), + make_option("--log",type="character",default=NULL, dest="log"), + make_option("--userid",type="character",default=NULL, dest="userid"), + make_option("--method",type="character",default=NULL, dest="method") +); + +opt_parser = OptionParser(option_list=option_list); +opt = parse_args(opt_parser); + +if(is.null(opt$input)){ + print_help(opt_parser) + stop("input required.", call.=FALSE) +} + +#loading libraries + args<-commandArgs(TRUE) -chrom=args[1] -dataset=args[2] -output=args[3] -tmp_dir=args[4] -input=args[5] -outputfigures=type.convert(args[6]) -tumorcsv=args[7] -user=args[8] -method=args[9] +chrom=opt$chrom +datasetFile=opt$input +output=opt$output +tmp_dir=opt$new_file_path +input=opt$settingsType +outputfigures=type.convert(opt$outputgraph) +tumorcsv=opt$settingsTypeTumor +user=opt$userid +method=opt$method +log=opt$log +outputlog=opt$outputlog +outputgraph=opt$outputgraph +zipfigures=opt$zipfigures + +#chrom=opt$chrom +#datasetFile=opt$input +#output=opt$output +#tmp_dir=opt$new_file_path +#nbcall=as.numeric(opt$nbcall) +#settingsType=opt$settingsType +#outputfigures=type.convert(opt$outputgraph) +#snp=type.convert(opt$snp) +#tumorcsv=opt$settingsTypeTumor +#cellularity=as.numeric(opt$cellularity) +#user=opt$userid +#method=opt$method +#log=opt$log +#outputlog=opt$outputlog +#outputgraph=opt$outputgraph +#zipfigures=opt$zipfigures library(MPAgenomics) workdir=file.path(tmp_dir, "mpagenomics",user) @@ -22,22 +78,67 @@ chrom_vec <- as.numeric(chrom_vecstring) } -input_tmp <- strsplit(input,",") -input_tmp_vecstring <-unlist(input_tmp) + +if (outputlog){ + sinklog <- file(log, open = "wt") + sink(sinklog ,type = "output") + sink(sinklog, type = "message") +} + + +inputDataset=read.table(file=datasetFile,stringsAsFactors=FALSE) +dataset=inputDataset[1,2] + -input_vecstring = sub("^([^.]*).*", "\\1", input_tmp_vecstring) +library(MPAgenomics) +workdir=file.path(tmp_dir, "mpagenomics",user) +setwd(workdir) -if (dataset == input) { +if (grepl("all",tolower(chrom)) | chrom=="None") { + chrom_vec=c(1:25) +} else { + chrom_tmp <- strsplit(chrom,",") + chrom_vecstring <-unlist(chrom_tmp) + chrom_vec <- as.numeric(chrom_vecstring) +} + +fig_dir = file.path("mpagenomics", user, "figures", dataset, "segmentation","fracB") +abs_fig_dir = file.path(tmp_dir, fig_dir) + +if (outputgraph) { + if (dir.exists(abs_fig_dir)) { + system(paste0("rm -r ", abs_fig_dir)) + } +} + +if (input == 'dataset') { segcall=segFracBSignal(dataset,chromosome=chrom_vec, normalTumorArray=tumorcsv, savePlot=outputfigures, method=method) } else { + input_tmp <- strsplit(input,",") + input_tmp_vecstring <-unlist(input_tmp) + input_vecstring = sub("^([^.]*).*", "\\1", input_tmp_vecstring) segcall=segFracBSignal(dataset,chromosome=chrom_vec, normalTumorArray=tumorcsv, listOfFiles=input_vecstring, savePlot=outputfigures, method=method) } -sink(output) -print(format(segcall)) -sink() -#write.table(segcall,output,row.names = FALSE, quote=FALSE, sep = "\t") +write.table(segcall,output,row.names = FALSE, quote=FALSE, sep = "\t") + +if (outputgraph) { + setwd(abs_fig_dir) + files2zip <- dir(abs_fig_dir) + zip(zipfile = "figures.zip", files = files2zip) + file.rename("figures.zip",zipfigures) +} -quit() +if (outputlog){ + sink(type="output") + sink(type="message") + close(sinklog) +} + +#sink(output) +#print(format(segcall)) +#sink() + +
--- a/segmentFracB.py Fri Apr 10 13:32:59 2020 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,87 +0,0 @@ -import os -import sys -import subprocess -import zipfile -import getopt - - -def main(argv): - - try: - opts, args = getopt.getopt(argv,"hc:i:o:f:s:og:fig:t:p:l:u:m:",["chrom=","input=","output=","new_file_path=","settings_type=","output_graph=","zip_figures=","settings_tumor=","outputlog=","log=","userid=","method="]) - except getopt.GetoptError as err: - print str(err) - sys.exit(2) - for opt, arg in opts: - if opt == '-h': - print 'extractCNopts.py' - sys.exit() - elif opt in ("-c", "--chrom"): - chromosome = arg - elif opt in ("-i", "--input"): - input_file = arg - elif opt in ("-o", "--output"): - output_file = arg - elif opt in ("-f", "--new_file_path"): - tmp_dir = arg - elif opt in ("-s", "--settings_type"): - input_type = arg - elif opt in ("-og", "--output_graph"): - output_graph = arg - elif opt in ("-fig", "--zip_figures"): - zip_file = arg - elif opt in ("-t", "--settings_tumor"): - tumorcsv = arg - elif opt in ("-p", "--outputlog"): - outputlog = arg - elif opt in ("-l", "--log"): - log = arg - elif opt in ("-u", "--userid"): - user_id = arg - elif opt in ("-m", "--method"): - method = arg - - script_dir=os.path.dirname(os.path.abspath(__file__)) - - iFile=open(input_file,'r') - dataSetLine=iFile.readline() - dataset=dataSetLine.split("\t")[1] - iFile.close() - - - if input_type=="dataset": - input_type=dataset - - if (outputlog=="TRUE"): - errfile=open(log,'w') - else: - errfile=open(os.path.join(tmp_dir,"errfile.log"),'w') - - fig_dir=os.path.join("mpagenomics",user_id,"figures",dataset,"segmentation/fracB") - - abs_fig_dir=os.path.join(tmp_dir,fig_dir) - if (os.path.isdir(abs_fig_dir)) and (output_graph=="TRUE"): - old_files=os.listdir(abs_fig_dir) - for ifile in old_files: - os.remove(os.path.join(abs_fig_dir,ifile)) - - - retcode=subprocess.call(["Rscript", os.path.join(script_dir,"segmentFracB.R"), chromosome, dataset, output_file, tmp_dir, input_type, output_graph, tumorcsv, user_id, method], stdout = errfile, stderr = errfile) - - errfile.close() - - if (retcode == 0): - if (os.path.isdir(abs_fig_dir)) and (output_graph=="TRUE"): - - new_files=os.listdir(abs_fig_dir) - zipbuf = zipfile.ZipFile(os.path.join(abs_fig_dir,zip_file), 'w', zipfile.ZIP_DEFLATED) - for current_file in new_files: - fn = os.path.join(abs_fig_dir,current_file) - relfn=fn[len(abs_fig_dir)+len(os.sep):] - zipbuf.write(fn,relfn) - sys.exit(retcode) - else: - sys.exit(retcode) - -if __name__ == "__main__": - main(main(sys.argv[1:]))
--- a/segmentFracB.xml Fri Apr 10 13:32:59 2020 +0000 +++ b/segmentFracB.xml Tue May 12 13:40:07 2020 +0000 @@ -1,24 +1,27 @@ -<tool id="segFracB" name="Segmentation of allele B fraction " force_history_refresh="True"> +<tool id="segFracB" name="Segmentation of allele B fraction " force_history_refresh="True" version="1.0.0"> <description></description> - <command interpreter="python"> - segmentFracB.py - --chrom '$chrom' - --input '$input' - --output '$output' - --new_file_path '$__new_file_path__' - #if $settings.settingsType == "file": - --settings_type '$settings.inputs' - #end if - #if $settings.settingsType == "dataset": - --settings_type '$settings.settingsType' - #end if - --output_graph '$outputgraph' - --zip_figures '$zipfigures' - --settings_tumor '$tumorcsv' - --outputlog '$outputlog' - --log '$log' - --userid '$__user_id__' - --method '$method' + <command> + <![CDATA[ + Rscript + ${__tool_directory__}/segmentFracB.R + --chrom '$chrom' + --input '$input' + --output '$output' + --new_file_path '$__new_file_path__' + #if $settings.settingsType == "file": + --settings_type '$settings.inputs' + #end if + #if $settings.settingsType == "dataset": + --settings_type '$settings.settingsType' + #end if + --output_graph '$outputgraph' + --zip_figures '$zipfigures' + --settings_tumor '$tumorcsv' + --outputlog '$outputlog' + --log '$log' + --userid '$__user_id__' + --method '$method' + ]]> </command> <inputs> <param name="input" type="data" format="dsf" label="Dataset summary file" help="Summary text file generated by the Data normalization tool"/>
--- a/selection.R Fri Apr 10 13:32:59 2020 +0000 +++ b/selection.R Tue May 12 13:40:07 2020 +0000 @@ -1,26 +1,71 @@ -args<-commandArgs(TRUE) +#!/usr/bin/env Rscript +# setup R error handling to go to stderr +options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) + +# we need that to not crash galaxy with an UTF8 error on German LC settings. +loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") + +library("optparse") + +##### Read options +option_list=list( + make_option("--chrom",type="character",default=NULL, dest="chrom"), + make_option("--input",type="character",default=NULL, dest="input"), + make_option("--output",type="character",default=NULL, dest="output"), + make_option("--new_file_path",type="character",default=NULL, dest="new_file_path"), + make_option("--response",type="character",default=NULL, dest="response"), + make_option("--settingsType",type="character",default=NULL, dest="settingsType"), + make_option("--outputgraph",type="character",default=NULL, dest="outputgraph"), + make_option("--settingsSnp",type="character",default=NULL, dest="settingsSnp"), + make_option("--settingsSignal",type="character",default=NULL, dest="settingsSignal"), + make_option("--settingsLoss",type="character",default=NULL, dest="settingsLoss"), + make_option("--pdffigures",type="character",default=NULL, dest="pdffigures"), + make_option("--folds",type="character",default=NULL, dest="folds"), + make_option("--outputlog",type="character",default=NULL, dest="outputlog"), + make_option("--log",type="character",default=NULL, dest="log"), + make_option("--userId",type="character",default=NULL, dest="userid"), + make_option("--settingsPackage",type="character",default=NULL, dest="settingsPackage") +); -input=args[1] -dataResponse=args[2] -chrom=args[3] -tmp_dir=args[4] -signal=args[5] -snp=type.convert(args[6]) -settingsType=args[7] -tumor=args[8] -fold=as.integer(args[9]) -loss=args[10] -plot=type.convert(args[11]) -output=args[12] -user=args[13] -package=args[14] +opt_parser = OptionParser(option_list=option_list); +opt = parse_args(opt_parser); + +if(is.null(opt$input)){ + print_help(opt_parser) + stop("input required.", call.=FALSE) +} + +#loading libraries + + +chrom=opt$chrom +dataset=opt$input +dataResponse=opt$response +output=opt$output +tmp_dir=opt$new_file_path +signal=opt$settingsSignal +settingsType=opt$settingsType +outputfigures=type.convert(opt$outputgraph) +snp=type.convert(opt$settingsSnp) +user=opt$userid +folds=as.numeric(opt$folds) +loss=opt$settingsLoss +log=opt$log +outputlog=opt$outputlog +outputgraph=opt$outputgraph +pdffigures=opt$pdffigures +package=opt$settingsPackage library(MPAgenomics) library(glmnet) library(spikeslab) library(lars) + +inputDataset=read.table(file=dataset,stringsAsFactors=FALSE) +input=inputDataset[1,2] workdir=file.path(tmp_dir, "mpagenomics",user) +print(workdir) setwd(workdir) if (grepl("all",tolower(chrom)) | chrom=="None") { @@ -31,18 +76,23 @@ chrom_vec <- as.numeric(chrom_vecstring) } - +if (outputlog){ + sinklog <- file(log, open = "wt") + sink(sinklog ,type = "output") + sink(sinklog, type = "message") +} + if (settingsType == "tumor") { if (signal=="CN") { - res=markerSelection(input,dataResponse, chromosome=chrom_vec, signal=signal, normalTumorArray=tumor, onlySNP=snp, loss=loss, plot=plot, nbFolds=fold, pkg=package) + res=markerSelection(input,dataResponse, chromosome=chrom_vec, signal=signal, normalTumorArray=tumor, onlySNP=snp, loss=loss, plot=outputfigures, nbFolds=folds, pkg=package) } else { - res=markerSelection(input,dataResponse, chromosome=chrom_vec,signal=signal,normalTumorArray=tumor, loss=loss, plot=plot, nbFolds=fold,pkg=package) + res=markerSelection(input,dataResponse, chromosome=chrom_vec,signal=signal,normalTumorArray=tumor, loss=loss, plot=outputfigures, nbFolds=folds,pkg=package) } } else { if (signal=="CN") { - res=markerSelection(input,dataResponse, chromosome=chrom_vec, signal=signal, onlySNP=snp, loss=loss, plot=plot, nbFolds=fold,pkg=package) + res=markerSelection(input,dataResponse, chromosome=chrom_vec, signal=signal, onlySNP=snp, loss=loss, plot=outputfigures, nbFolds=folds,pkg=package) } else { - res=markerSelection(input,dataResponse, chromosome=chrom_vec, signal=signal, loss=loss, plot=plot, nbFolds=fold,pkg=package) + res=markerSelection(input,dataResponse, chromosome=chrom_vec, signal=signal, loss=loss, plot=outputfigures, nbFolds=folds,pkg=package) } } @@ -63,12 +113,22 @@ } } +if (outputgraph){ + file.rename(file.path(tmp_dir,"mpagenomics",user,"Rplots.pdf"), pdffigures) +} + +if (outputlog){ + sink(type="output") + sink(type="message") + close(sinklog) +} + if (markerSelected) { colnames(df) <- c("chr","position","index","names","coefficient") - sink(output) - print(format(df),row.names=FALSE) - sink() - #write.table(df,output,row.names = FALSE, quote = FALSE, sep = "\t") + #sink(output) + #print(format(df),row.names=FALSE) + #sink() + write.table(df,output,row.names = FALSE, quote = FALSE, sep = "\t") } else writeLines("no SNP selected", output)
--- a/selection.py Fri Apr 10 13:32:59 2020 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,38 +0,0 @@ -import os -import sys -import subprocess -import shutil - -def main(): - - input_file=sys.argv[1] - tmp_dir=sys.argv[4] - script_dir=os.path.dirname(os.path.abspath(__file__)) - plot=sys.argv[11] - pdffigures=sys.argv[13] - outputlog=sys.argv[14] - log=sys.argv[15] - user=sys.argv[16] - package=sys.argv[17] - - iFile=open(input_file,'r') - dataSetLine=iFile.readline() - dataset=dataSetLine.split("\t")[1] - iFile.close() - - if (outputlog=="TRUE"): - errfile=open(log,'w') - else: - errfile=open(os.path.join(tmp_dir,"errfile.log"),'w') - - retcode=subprocess.call(["Rscript", os.path.join(script_dir,"selection.R"), dataset, sys.argv[2], sys.argv[3], sys.argv[4], sys.argv[5], sys.argv[6], sys.argv[7], sys.argv[8], sys.argv[9], sys.argv[10], sys.argv[11], sys.argv[12],sys.argv[16],package], stdout = errfile, stderr = errfile) - - if (plot=="TRUE"): - shutil.copy(os.path.join(tmp_dir,"mpagenomics",user,"Rplots.pdf"), pdffigures) - - errfile.close() - - sys.exit(retcode) - -if __name__ == "__main__": - main()
--- a/selection.xml Fri Apr 10 13:32:59 2020 +0000 +++ b/selection.xml Tue May 12 13:40:07 2020 +0000 @@ -1,26 +1,41 @@ <tool id="selection" name="Markers selection" force_history_refresh="True" version="0.1.0"> - <command interpreter="python"> - selection.py '$input' '$response' '$chromosome' '$__new_file_path__' '$settingsSNP.signal' + <command> + <![CDATA[ + Rscript + ${__tool_directory__}/selection.R + --input '$input' + --response '$response' + --chrom '$chromosome' + --new_file_path '$__new_file_path__' + --settingsSignal '$settingsSNP.signal' #if $settingsSNP.signal == "CN": - '$settingsSNP.snp' + --settingsSnp '$settingsSNP.snp' #end if #if $settingsSNP.signal == "fracB": - 'none' + --settingsSnp 'none' #end if - '$settings.settingsType' + --settingsType '$settings.settingsType' #if $settings.settingsType == "tumor": - '$tumorcsv' + --settingsType '$tumorcsv' #end if #if $settings.settingsType == "standard": - 'none' + --settingsType 'none' #end if - '$folds' '$settingsLoss.loss' '$outputgraph' '$output' '$pdffigures' '$outputlog' '$log' '$__user_id__' + --folds '$folds' + --settingsLoss '$settingsLoss.loss' + --outputgraph '$outputgraph' + --output '$output' + --pdffigures '$pdffigures' + --outputlog '$outputlog' + --log '$log' + --userId '$__user_id__' #if $settingsLoss.loss == "linear": - '$settingsLoss.package' + --settingsPackage '$settingsLoss.package' #end if #if $settingsLoss.loss == "logistic": - 'HDPenReg' + --settingsPackage'HDPenReg' #end if + ]]> </command> <inputs> <param name="input" type="data" format="dsf" label="Dataset summary file" help="Summary text file generated by the Data normalization tool"/> @@ -103,7 +118,7 @@ </inputs> <outputs> - <data format="txt" name="output" label="selection of ${input.name}" /> + <data format="tabular" name="output" label="selection of ${input.name}" /> <data format="pdf" name="pdffigures" label="figures of SNPs selection of ${input.name}"> <filter>outputgraph == "TRUE"</filter> <filter>(settingsLoss['package'] != 'spikeslab')</filter>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/selectionExtracted.R Tue May 12 13:40:07 2020 +0000 @@ -0,0 +1,89 @@ +#!/usr/bin/env Rscript +# setup R error handling to go to stderr +options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) + +# we need that to not crash galaxy with an UTF8 error on German LC settings. +loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") + +library("optparse") + +##### Read options +option_list=list( + make_option("--input",type="character",default=NULL, dest="input"), + make_option("--output",type="character",default=NULL, dest="output"), + make_option("--new_file_path",type="character",default=NULL, dest="new_file_path"), + make_option("--response",type="character",default=NULL, dest="response"), + make_option("--loss",type="character",default=NULL, dest="loss"), + make_option("--folds",type="character",default=NULL, dest="folds"), + make_option("--outputlog",type="character",default=NULL, dest="outputlog"), + make_option("--log",type="character",default=NULL, dest="log") +); + +opt_parser = OptionParser(option_list=option_list); +opt = parse_args(opt_parser); + +if(is.null(opt$input)){ + print_help(opt_parser) + stop("input required.", call.=FALSE) +} + +#loading libraries + + +input=opt$input +response=opt$response +output=opt$output +tmp_dir=opt$new_file_path +nbFolds=as.numeric(opt$folds) +loss=opt$loss +log=opt$log +outputlog=opt$outputlog + + +#args<-commandArgs(TRUE) +# +#input=args[1] +#response=args[2] +#tmp_dir=args[3] +#nbFolds=as.numeric(args[4]) +#loss=args[5] +#output=args[6] + +library(MPAgenomics) +workdir=file.path(tmp_dir, "mpagenomics") +setwd(workdir) + +if (outputlog){ + sinklog <- file(log, open = "wt") + sink(sinklog ,type = "output") + sink(sinklog, type = "message") +} + +CN=read.table(input,header=TRUE,check.names=FALSE) +drops=c("chromosome","position","probeName") +CNsignal=CN[,!(names(CN)%in% drops)] +samples=names(CNsignal) +CNsignalMatrix=t(data.matrix(CNsignal)) +resp=read.table(response,header=TRUE,sep=",") +listOfFile=resp[[1]] +responseValue=resp[[2]] +index = match(listOfFile,rownames(CNsignalMatrix)) +responseValueOrder=responseValue[index] + +result=variableSelection(CNsignalMatrix,responseValueOrder,nbFolds=nbFolds,loss=loss,plot=TRUE) + +CNsignalResult=CN[result$markers.index,(names(CN)%in% drops)] + +CNsignalResult["coefficient"]=result$coefficient +CNsignalResult["index"]=result$markers.index + +if (outputlog){ + sink(type="output") + sink(type="message") + close(sinklog) +} + +#sink(output) +#print(format(CNsignalResult),row.names=FALSE) +#sink() +write.table(CNsignalResult,output,row.names = FALSE, quote=FALSE, sep = "\t")
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/selectionExtracted.xml Tue May 12 13:40:07 2020 +0000 @@ -0,0 +1,111 @@ +<tool id="markersSelection" name="Markers selection" force_history_refresh="True" version="0.1.0"> + <description> of previously extracted signal</description> + <command> + <![CDATA[ + Rscript + ${__tool_directory__}/selectionExtracted.R + --input '$input' + --response '$response' + --new_file_path '$__new_file_path__' + --folds '$folds' + --loss '$loss' + --outputlog '$outputlog' + --output '$output' + --log '$log' + ]]> + </command> + <inputs> + <param name="input" type="data" format="sef" label="Input Signal" help="see below for more information on file format"/> + <param name="response" type="data" format="csv" label="Data response" help="Data response csv file. See below for more information on file format" /> + <param name="folds" type="integer" min="1" value="10" label ="Number of folds for cross validation" help="Integer between 1 and number of file in the .cel file dataset"/> + <param name="loss" type="select" multiple="false" label="Response type"> + <option value="linear">Linear</option> + <option value="logistic">Logistic</option> + </param> + + <param name="outputgraph" type="select" label="Output figures"> + <option value="TRUE">Yes</option> + <option value="FALSE">No</option> + </param> + <param name="outputlog" type="select" label="Output log"> + <option value="TRUE">Yes</option> + <option value="FALSE">No</option> + </param> + </inputs> + <outputs> + <data format="tabular" name="output" label="selection of ${input.name}" /> + <data format="log" name="log" label="log of selection of ${input.name}" > + <filter>outputlog == "TRUE"</filter> + </data> + </outputs> + <stdio> + <exit_code range="1:" level="fatal" description="See logs for more details" /> + </stdio> + <help> + **What it does** + +This tool selects some relevant markers according to a response using penalized regressions. + +Input: + +*A tabular text file containing 3 fixed columns and 1 column per sample:* + + - chr: Chromosome. + - position: Genomic position (in bp). + - probeNames: Names of the probes. + - One column per sample which contain the copy number signal for each sample. + +Output: + +*A tabular text file containing 5 columns which describe all the selected SNPs (1 line per SNP):* + + - chr: Chromosome containing the selected SNP. + - position: Position of the selected SNP. + - index: Index of the selected SNP. + - names: Name of the selected SNP. + - coefficient: Regression coefficient of the selected SNP. + +----- + +**Data Response csv file** + +Data response csv file format: + + - The first column contains the names of the different files of the dataset. + + - The second column is the response associated with each file. + + - Column names of these two columns are respectively files and response. + + - Columns are separated by a comma + + - *Extensions of the files (.CEL for example) should be removed* + + + +**Example** + +Let 3 .cel files in the studied dataset :: + + patient1.cel + patient2.cel + patient3.cel + +The csv file should look like this :: + + files,response + patient1,1.92145 + patient2,2.12481 + patient3,1.23545 + + +----- + +**Citation** + +If you use this tool please cite : + +`Q. Grimonprez, A. Celisse, M. Cheok, M. Figeac, and G. Marot. MPAgenomics : An R package for multi-patients analysis of genomic markers, 2014. Preprint <http://fr.arxiv.org/abs/1401.5035>`_ + + </help> +</tool>