Mercurial > repos > sblanck > mpagenomics_wrappers
changeset 7:f2d24110f65a draft
planemo upload for repository https://github.com/sblanck/MPAgenomics4Galaxy/tree/master/mpagenomics_wrappers commit 2bcab460cab11ab529468aa0c10354f61accc08a
author | sblanck |
---|---|
date | Fri, 10 Apr 2020 13:32:59 +0000 |
parents | 7afc2ee6cc89 |
children | d5ed62b4d3ac |
files | filter.xml markersSelection.R markersSelection.py markersSelection.xml |
diffstat | 4 files changed, 1 insertions(+), 170 deletions(-) [+] |
line wrap: on
line diff
--- a/filter.xml Thu Apr 09 15:23:57 2020 +0000 +++ b/filter.xml Fri Apr 10 13:32:59 2020 +0000 @@ -32,7 +32,7 @@ </inputs> <outputs> - <data format="txt" name="output" label="filter of ${on_string}" /> + <data format="tabular" name="output" label="filter of ${on_string}" /> <data format="log" name="log" label="log of segmentation of ${input.name}"> <filter>outputlog == "TRUE"</filter>
--- a/markersSelection.R Thu Apr 09 15:23:57 2020 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,35 +0,0 @@ -args<-commandArgs(TRUE) - -input=args[1] -response=args[2] -tmp_dir=args[3] -nbFolds=as.numeric(args[4]) -loss=args[5] -output=args[6] - -library(MPAgenomics) -workdir=file.path(tmp_dir, "mpagenomics") -setwd(workdir) - -CN=read.table(input,header=TRUE,check.names=FALSE) -drops=c("chromosome","position","probeName") -CNsignal=CN[,!(names(CN)%in% drops)] -samples=names(CNsignal) -CNsignalMatrix=t(data.matrix(CNsignal)) -resp=read.table(response,header=TRUE,sep=",") -listOfFile=resp[[1]] -responseValue=resp[[2]] -index = match(listOfFile,rownames(CNsignalMatrix)) -responseValueOrder=responseValue[index] - -result=variableSelection(CNsignalMatrix,responseValueOrder,nbFolds=nbFolds,loss=loss,plot=TRUE) - -CNsignalResult=CN[result$markers.index,(names(CN)%in% drops)] - -CNsignalResult["coefficient"]=result$coefficient -CNsignalResult["index"]=result$markers.index - -sink(output) -print(format(CNsignalResult),row.names=FALSE) -sink() -#write.table(CNsignalResult,output,row.names = FALSE, quote=FALSE, sep = "\t")
--- a/markersSelection.py Thu Apr 09 15:23:57 2020 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,34 +0,0 @@ -import os -import sys -import subprocess - -def main(): - - inputdata=sys.argv[1] - response=sys.argv[2] - tmp_dir=sys.argv[3] - nbfold=sys.argv[4] - loss=sys.argv[5] - outputlog=sys.argv[6] - output=sys.argv[7] - log=sys.argv[8] - - script_dir=os.path.dirname(os.path.abspath(__file__)) - - if (outputlog=="TRUE"): - errfile=open(log,'w') - else: - errfile=open(os.path.join(tmp_dir,"errfile.log"),'w') - - - retcode=subprocess.call(["Rscript", os.path.join(script_dir,"markersSelection.R"), inputdata, response, tmp_dir, nbfold, loss, output], stdout = errfile, stderr = errfile) - -# if (plot=="TRUE"): -# shutil.copy(os.path.join(tmp_dir,"mpagenomics","Rplots.pdf"), pdffigures) - - errfile.close() - - sys.exit(retcode) - -if __name__ == "__main__": - main() \ No newline at end of file
--- a/markersSelection.xml Thu Apr 09 15:23:57 2020 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,100 +0,0 @@ -<tool id="markersSelection" name="Markers selection" force_history_refresh="True" version="0.1.0"> - <requirement type="package" version="1.1.2">mpagenomics</requirement> - <command interpreter="python"> - markersSelection.py '$input' '$response' '$__new_file_path__' '$folds' '$loss' '$outputlog' '$output' '$log' - </command> - <inputs> - <param name="input" type="data" format="sef" label="Input Signal" help="see below for more information on file format"/> - <param name="response" type="data" format="csv" label="Data response" help="Data response csv file. See below for more information on file format" /> - <param name="folds" type="integer" min="1" value="10" label ="Number of folds for cross validation" help="Integer between 1 and number of file in the .cel file dataset"/> - <param name="loss" type="select" multiple="false" label="Response type"> - <option value="linear">Linear</option> - <option value="logistic">Logistic</option> - </param> - - <param name="outputgraph" type="select" label="Output figures"> - <option value="TRUE">Yes</option> - <option value="FALSE">No</option> - </param> - <param name="outputlog" type="select" label="Output log"> - <option value="TRUE">Yes</option> - <option value="FALSE">No</option> - </param> - </inputs> - <outputs> - <data format="txt" name="output" label="selection of ${input.name}" /> - <data format="log" name="log" label="log of selection of ${input.name}" > - <filter>outputlog == "TRUE"</filter> - </data> - </outputs> - <stdio> - <exit_code range="1:" level="fatal" description="See logs for more details" /> - </stdio> - <help> - **What it does** - -This tool selects some relevant markers according to a response using penalized regressions. - -Input: - -*A tabular text file containing 3 fixed columns and 1 column per sample:* - - - chr: Chromosome. - - position: Genomic position (in bp). - - probeNames: Names of the probes. - - One column per sample which contain the copy number signal for each sample. - -Output: - -*A tabular text file containing 5 columns which describe all the selected SNPs (1 line per SNP):* - - - chr: Chromosome containing the selected SNP. - - position: Position of the selected SNP. - - index: Index of the selected SNP. - - names: Name of the selected SNP. - - coefficient: Regression coefficient of the selected SNP. - ------ - -**Data Response csv file** - -Data response csv file format: - - - The first column contains the names of the different files of the dataset. - - - The second column is the response associated with each file. - - - Column names of these two columns are respectively files and response. - - - Columns are separated by a comma - - - *Extensions of the files (.CEL for example) should be removed* - - - -**Example** - -Let 3 .cel files in the studied dataset :: - - patient1.cel - patient2.cel - patient3.cel - -The csv file should look like this :: - - files,response - patient1,1.92145 - patient2,2.12481 - patient3,1.23545 - - ------ - -**Citation** - -If you use this tool please cite : - -`Q. Grimonprez, A. Celisse, M. Cheok, M. Figeac, and G. Marot. MPAgenomics : An R package for multi-patients analysis of genomic markers, 2014. Preprint <http://fr.arxiv.org/abs/1401.5035>`_ - - </help> -</tool>