changeset 7:f2d24110f65a draft

planemo upload for repository https://github.com/sblanck/MPAgenomics4Galaxy/tree/master/mpagenomics_wrappers commit 2bcab460cab11ab529468aa0c10354f61accc08a
author sblanck
date Fri, 10 Apr 2020 13:32:59 +0000
parents 7afc2ee6cc89
children d5ed62b4d3ac
files filter.xml markersSelection.R markersSelection.py markersSelection.xml
diffstat 4 files changed, 1 insertions(+), 170 deletions(-) [+]
line wrap: on
line diff
--- a/filter.xml	Thu Apr 09 15:23:57 2020 +0000
+++ b/filter.xml	Fri Apr 10 13:32:59 2020 +0000
@@ -32,7 +32,7 @@
   </inputs>
   
   <outputs>
-    <data format="txt" name="output" label="filter of ${on_string}" />
+    <data format="tabular" name="output" label="filter of ${on_string}" />
     
 	<data format="log" name="log" label="log of segmentation of ${input.name}">
     	<filter>outputlog == "TRUE"</filter>
--- a/markersSelection.R	Thu Apr 09 15:23:57 2020 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,35 +0,0 @@
-args<-commandArgs(TRUE)
-
-input=args[1]
-response=args[2]
-tmp_dir=args[3]
-nbFolds=as.numeric(args[4])
-loss=args[5]
-output=args[6]
-
-library(MPAgenomics)
-workdir=file.path(tmp_dir, "mpagenomics")
-setwd(workdir)
-
-CN=read.table(input,header=TRUE,check.names=FALSE)
-drops=c("chromosome","position","probeName")
-CNsignal=CN[,!(names(CN)%in% drops)]
-samples=names(CNsignal)
-CNsignalMatrix=t(data.matrix(CNsignal))
-resp=read.table(response,header=TRUE,sep=",")
-listOfFile=resp[[1]]
-responseValue=resp[[2]]
-index = match(listOfFile,rownames(CNsignalMatrix))
-responseValueOrder=responseValue[index]
-
-result=variableSelection(CNsignalMatrix,responseValueOrder,nbFolds=nbFolds,loss=loss,plot=TRUE)
-
-CNsignalResult=CN[result$markers.index,(names(CN)%in% drops)]
-
-CNsignalResult["coefficient"]=result$coefficient
-CNsignalResult["index"]=result$markers.index
-
-sink(output)
-print(format(CNsignalResult),row.names=FALSE)
-sink()
-#write.table(CNsignalResult,output,row.names = FALSE, quote=FALSE, sep = "\t")
--- a/markersSelection.py	Thu Apr 09 15:23:57 2020 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,34 +0,0 @@
-import os
-import sys
-import subprocess
-
-def main():
-
-    inputdata=sys.argv[1]
-    response=sys.argv[2]
-    tmp_dir=sys.argv[3]
-    nbfold=sys.argv[4]
-    loss=sys.argv[5]
-    outputlog=sys.argv[6] 
-    output=sys.argv[7]
-    log=sys.argv[8]
-    
-    script_dir=os.path.dirname(os.path.abspath(__file__))
-        
-    if (outputlog=="TRUE"):
-        errfile=open(log,'w')
-    else:
-        errfile=open(os.path.join(tmp_dir,"errfile.log"),'w')
-    
- 
-    retcode=subprocess.call(["Rscript", os.path.join(script_dir,"markersSelection.R"), inputdata, response, tmp_dir, nbfold, loss, output], stdout = errfile, stderr = errfile)
-    
-#  if (plot=="TRUE"):
-#      shutil.copy(os.path.join(tmp_dir,"mpagenomics","Rplots.pdf"), pdffigures)
-    
-    errfile.close()
-       
-    sys.exit(retcode)
-
-if __name__ == "__main__":
-    main()
\ No newline at end of file
--- a/markersSelection.xml	Thu Apr 09 15:23:57 2020 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,100 +0,0 @@
-<tool id="markersSelection" name="Markers selection" force_history_refresh="True" version="0.1.0">
-  <requirement type="package" version="1.1.2">mpagenomics</requirement>
-  <command interpreter="python">
-    markersSelection.py '$input' '$response' '$__new_file_path__' '$folds' '$loss' '$outputlog' '$output' '$log'
-  </command>
-  <inputs>
-    <param name="input" type="data" format="sef" label="Input Signal" help="see below for more information on file format"/>
-	<param name="response" type="data" format="csv" label="Data response" help="Data response csv file. See below for more information on file format" />    
-	<param name="folds" type="integer" min="1" value="10" label ="Number of folds for cross validation" help="Integer between 1 and number of file in the .cel file dataset"/>
-    <param name="loss" type="select" multiple="false" label="Response type">
-      	<option value="linear">Linear</option>
-    	<option value="logistic">Logistic</option>
-    </param>
-   
-	<param name="outputgraph" type="select" label="Output figures">
-        <option value="TRUE">Yes</option>
-        <option value="FALSE">No</option>
-     </param>    
-    <param name="outputlog" type="select" label="Output log">
-        <option value="TRUE">Yes</option>
-        <option value="FALSE">No</option>
-    </param>    
-  </inputs>       
-  <outputs>
-    <data format="txt" name="output" label="selection of ${input.name}" />
-    <data format="log" name="log" label="log of selection of ${input.name}" >
-    	<filter>outputlog == "TRUE"</filter>
-    </data>
-  </outputs>
-  <stdio>
-    <exit_code range="1:"   level="fatal"   description="See logs for more details" />
-   </stdio>
-  <help>
-  **What it does**
-   	    	
-This tool selects some relevant markers according to a response using penalized regressions.
-  
-Input:
-
-*A tabular text file containing 3 fixed columns and 1 column per sample:*
-	
-	- chr: Chromosome.
-	- position: Genomic position (in bp).
-  	- probeNames: Names of the probes.
-  	- One column per sample which contain the copy number signal for each sample.
-
-Output:
-  	
-*A tabular text file containing 5 columns which describe all the selected SNPs (1 line per SNP):*
-	
-	- chr: Chromosome containing the selected SNP.
-  	- position: Position of the selected SNP.
-	- index: Index of the selected SNP.
-	- names: Name of the selected SNP.
-	- coefficient: Regression coefficient of the selected SNP.
-
------
-
-**Data Response csv file**
-     	
-Data response csv file format:
-	
-	- The first column contains the names of the different files of the dataset.
-     	 
-	- The second column is the response associated with each file. 
-     	
-	- Column names of these two columns are respectively files and response.
-
-	- Columns are separated by a comma
-     	
-	- *Extensions of the files (.CEL for example) should be removed*
-
-
-     	
-**Example** 
-
-Let 3 .cel files in the studied dataset ::
-     	
-     	patient1.cel
-     	patient2.cel
-     	patient3.cel 
-     	
-The csv file should look like this ::
-     	
-     	files,response
-     	patient1,1.92145
-     	patient2,2.12481
-     	patient3,1.23545
-
-
------
-  	   	
-**Citation**
-	
-If you use this tool please cite : 
-
-`Q. Grimonprez, A. Celisse, M. Cheok, M. Figeac, and G. Marot. MPAgenomics : An R package for multi-patients analysis of genomic markers, 2014. Preprint &lt;http://fr.arxiv.org/abs/1401.5035&gt;`_
-  
- </help>
-</tool>