Galaxy | (sandbox for testing) |

Changeset 13:b2ec6ec6ef74 (2013-06-12)

Previous changeset 12:65a97686ca5d (2013-06-12) Next changeset 14:a4d7ec124c53 (2013-06-12)

Commit message:
Uploaded

modified:
rgedgeR/rgedgeRpaired.xml
rgedgeR/tool_dependencies.xml

removed:
rgedgeR/rgedgeRpaired.xml~
rgedgeR/test-data/gentestdata.sh~
rgedgeR/tool_dependencies.xml~

diff -r 65a97686ca5d -r b2ec6ec6ef74 rgedgeR/rgedgeRpaired.xml
--- a/rgedgeR/rgedgeRpaired.xml Wed Jun 12 05:21:25 2013 -0400
+++ b/rgedgeR/rgedgeRpaired.xml Wed Jun 12 06:13:41 2013 -0400

@@ -1,7 +1,6 @@
<tool id="rgedgeRpaired" name="edgeR" version="0.18">
   <description>1 or 2 level models for count data</description>
   <requirements>
-      <requirement type="package" version="6.2">readline</requirement>
       <requirement type="package" version="3.0.1">package_R</requirement>
       <requirement type="package" version="2.12">package_BioCBasics</requirement>
   </requirements>

diff -r 65a97686ca5d -r b2ec6ec6ef74 rgedgeR/rgedgeRpaired.xml~
--- a/rgedgeR/rgedgeRpaired.xml~ Wed Jun 12 05:21:25 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

b'@@ -1,633 +0,0 @@\n-<tool id="rgedgeRpaired" name="edgeR" version="0.18">\n- <description>1 or 2 level models for count data</description>\n- <requirements>\n- <requirement type="package" version="6.2">name=readline</requirement>\n- <requirement type="package" version="3.0.1">name=package_R</requirement>\n- <requirement type="package" version="2.12">name=package_BioCBasics</requirement>\n- </requirements>\n- \n- <command interpreter="python">\n- rgToolFactory.py --script_path "$runme" --interpreter "Rscript" --tool_name "edgeR" \n- --output_dir "$html_file.files_path" --output_html "$html_file" --output_tab "$outtab" --make_HTML "yes"\n- </command>\n- <inputs>\n- <param name="input1" type="data" format="tabular" label="Select an input matrix - rows are contigs, columns are counts for each sample"\n- help="Use the HTSeq based count matrix preparation tool to create these matrices from BAM/SAM files and a GTF file of genomic features"/>\n- <param name="title" type="text" value="edgeR" size="80" label="Title for job outputs" help="Supply a meaningful name here to remind you what the outputs contain">\n- <sanitizer invalid_char="">\n- <valid initial="string.letters,string.digits"><add value="_" /> </valid>\n- </sanitizer>\n- </param>\n- <param name="treatment_name" type="text" value="Treatment" size="50" label="Treatment Name"/>\n- <param name="Treat_cols" label="Select columns containing treatment." type="data_column" data_ref="input1" numerical="True" \n- multiple="true" use_header_names="true" size="120" display="checkboxes">\n- <validator type="no_options" message="Please select at least one column."/>\n- </param>\n- <param name="control_name" type="text" value="Control" size="50" label="Control Name"/>\n- <param name="Control_cols" label="Select columns containing control." type="data_column" data_ref="input1" numerical="True" \n- multiple="true" use_header_names="true" size="120" display="checkboxes" optional="true">\n- </param>\n- <param name="subjectids" type="text" optional="true" size="120"\n- label="IF SUBJECTS NOT ALL INDEPENDENT! Enter integers to indicate sample pairing for every column in input"\n- help="Leave blank if no pairing, but eg if data from sample id A99 is in columns 2,4 and id C21 is in 3,5 then enter \'1,2,1,2\'">\n- <sanitizer>\n- <valid initial="string.digits"><add value="," /> </valid>\n- </sanitizer>\n- </param>\n- <param name="fQ" type="float" value="0.3" size="5" label="Non-differential contig count quantile threshold - zero to analyze all non-zero read count contigs"\n- help="May be a good or a bad idea depending on the biology and the question. EG 0.3 = sparsest 30% of contigs with at least one read are removed before analysis"/>\n- <param name="useNDF" type="boolean" truevalue="T" checked=\'false\' falsevalue="" size="1" label="Non differential filter - remove contigs below a threshold (1 per million) for half or more samples"\n- help="May be a good or a bad idea depending on the biology and the question. This was the old default. Quantile based is available as an alternative"/>\n- <param name="priordf" type="integer" value="20" size="3" label="prior.df for tagwise dispersion - lower value = more emphasis on each tag\'s variance. Replaces prior.n and prior.df = prior.n * residual.df"\n- help="Zero = Use edgeR default. Use a small value to \'smooth\' small samples. See edgeR docs and note below"/>\n- <param name="fdrthresh" type="float" value="0.05" size="5" label="P value threshold for FDR filtering for amily wise error rate control"\n- help="Conventional default value of 0.05 recommended"/>\n- <param name="fdrtype" type="select" label="FDR (Type II error) control method" \n- help="Use fdr or bh typically to control for the number of tests in a reliable way">\n- <option value="fdr" selected="true">fdr</option>\n- <option value="BH">Benjamini Hochberg<'..b'ethods.\n-\n-If you have (eg) paired samples and wish to include a term in the GLM to account for some other factor (subject in the case of paired samples),\n-put a comma separated list of indicators for every sample (whether modelled or not!) indicating (eg) the subject number or \n-A list of integers, one for each subject or an empty string if samples are all independent.\n-If not empty, there must be exactly as many integers in the supplied integer list as there are columns (samples) in the count matrix.\n-Integers for samples that are not in the analysis *must* be present in the string as filler even if not used.\n-\n-So if you have 2 pairs out of 6 samples, you need to put in unique integers for the unpaired ones\n-eg if you had 6 samples with the first two independent but the second and third pairs each being from independent subjects. you might use\n-8,9,1,1,2,2 \n-as subject IDs to indicate two paired samples from the same subject in columns 3/4 and 5/6\n-\n-**Output**\n-\n-A matrix which consists the original data and relative expression levels and some helpful plots\n-\n-**Note on edgeR versions**\n-\n-The edgeR authors made a small cosmetic change in the name of one important variable (from p.value to PValue) \n-breaking this and all other code that assumed the old name for this variable, \n-between edgeR2.4.4 and 2.4.6 (the version for R 2.14 as at the time of writing). \n-This means that all code using edgeR is sensitive to the version. I think this was a very unwise thing \n-to do because it wasted hours of my time to track down and will similarly cost other edgeR users dearly\n-when their old scripts break. This tool currently now works with 2.4.6.\n-\n-**Note on prior.N**\n-\n-http://seqanswers.com/forums/showthread.php?t=5591 says:\n-\n-*prior.n*\n-\n-The value for prior.n determines the amount of smoothing of tagwise dispersions towards the common dispersion. \n-You can think of it as like a "weight" for the common value. (It is actually the weight for the common likelihood \n-in the weighted likelihood equation). The larger the value for prior.n, the more smoothing, i.e. the closer your \n-tagwise dispersion estimates will be to the common dispersion. If you use a prior.n of 1, then that gives the \n-common likelihood the weight of one observation.\n-\n-In answer to your question, it is a good thing to squeeze the tagwise dispersions towards a common value, \n-or else you will be using very unreliable estimates of the dispersion. I would not recommend using the value that \n-you obtained from estimateSmoothing()---this is far too small and would result in virtually no moderation \n-(squeezing) of the tagwise dispersions. How many samples do you have in your experiment? \n-What is the experimental design? If you have few samples (less than 6) then I would suggest a prior.n of at least 10. \n-If you have more samples, then the tagwise dispersion estimates will be more reliable, \n-so you could consider using a smaller prior.n, although I would hesitate to use a prior.n less than 5. \n-\n-\n-From Bioconductor Digest, Vol 118, Issue 5, Gordon writes:\n-\n-Dear Dorota,\n-\n-The important settings are prior.df and trend.\n-\n-prior.n and prior.df are related through prior.df = prior.n * residual.df,\n-and your experiment has residual.df = 36 - 12 = 24. So the old setting of\n-prior.n=10 is equivalent for your data to prior.df = 240, a very large\n-value. Going the other way, the new setting of prior.df=10 is equivalent\n-to prior.n=10/24.\n-\n-To recover old results with the current software you would use\n-\n- estimateTagwiseDisp(object, prior.df=240, trend="none")\n-\n-To get the new default from old software you would use\n-\n- estimateTagwiseDisp(object, prior.n=10/24, trend=TRUE)\n-\n-Actually the old trend method is equivalent to trend="loess" in the new\n-software. You should use plotBCV(object) to see whether a trend is\n-required.\n-\n-Note you could also use\n-\n- prior.n = getPriorN(object, prior.df=10)\n-\n-to map between prior.df and prior.n.\n-\n-</help>\n-\n-</tool>\n-\n-\n'

diff -r 65a97686ca5d -r b2ec6ec6ef74 rgedgeR/test-data/gentestdata.sh~
--- a/rgedgeR/test-data/gentestdata.sh~ Wed Jun 12 05:21:25 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

@@ -1,8 +0,0 @@
-#!/bin/bash
-# generate test data for rgGSEA
-# ross lazarus June 2013
-# adjust gseajar_path !
-GSEAJAR_PATH=/home/rlazarus/galaxy-central/tool_dependency_dir/gsea_jar/2.0.12/fubar/rg_gsea_test/8e291f464aa0/jars/gsea2-2.0.12.jar
-python ../rgGSEA.py --input_tab "gsea_test_DGE.xls" --adjpvalcol "5" --signcol "2" --idcol "1" --outhtml "gseatestout.html" --input_name "gsea_test" --setMax "500" --setMin "15" --nPerm "10" --plotTop "20" --gsea_jar "$GSEAJAR_PATH" --output_dir "gseatestout" --mode "Max_probe" --title "GSEA test" --builtin_gmt "gseatestdata.gmt"
-
-

diff -r 65a97686ca5d -r b2ec6ec6ef74 rgedgeR/tool_dependencies.xml
--- a/rgedgeR/tool_dependencies.xml Wed Jun 12 05:21:25 2013 -0400
+++ b/rgedgeR/tool_dependencies.xml Wed Jun 12 06:13:41 2013 -0400

@@ -1,37 +1,10 @@
<?xml version="1.0"?>
<tool_dependency>
-    <package name="readline" version="6.2">
-        <repository changeset_revision="1301ec7705a8" name="package_readline_6_2" owner="boris" prior_installation_required="True" toolshed="http://testtoolshed.g2.bx.psu.edu/" />
-    </package>
-    <package name="package_R" version="3.0.1">
-        <install version="1.0">
-            <actions>
-                <action type="download_by_url">http://cran.ms.unimelb.edu.au/src/base/R-3/R-3.0.1.tar.gz</action>
-                <action type="set_environment_for_install">
-                    <repository changeset_revision="1301ec7705a8" name="package_readline_6_2" owner="boris" toolshed="http://testtoolshed.g2.bx.psu.edu/">
-                        <package name="package_readline_6_2" version="6.2" />
-                    </repository>
-                </action>
-                <action type="make_directory">$INSTALL_DIR</action>
-                <action type="shell_command">./configure --with-blas --with-lapack --enable-R-shlib  --with-x=no --prefix=$INSTALL_DIR && make && make install</action>
-                <action type="set_environment">
-                    <environment_variable action="set_to" name="R_HOME">$INSTALL_DIR/lib/R</environment_variable>
-                    <environment_variable action="set_to" name="R_LIBS">$INSTALL_DIR/lib/R/library</environment_variable>
-                    <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/lib/R/bin</environment_variable>
-                </action>
-            </actions>
-        </install>
-        <readme>R is a free software environment for statistical computing and graphics
-                WARNING: See custom compilation options above
-                Modified from an older version of R by Boris by Ross Lazarus for R 3.0
-                Added Bioc basics too
-       </readme>
-    </package>
     <package name="package_BioCBasics" version="2.12">
         <install version="1.0">
             <actions>
                 <action type="set_environment_for_install">
-                    <package name="package_R" prior_installation_required="True" toolshed="http://testtoolshed.g2.bx.psu.edu" version="3.0.1" />
+                    <repository name="package_R" owner="fubar" prior_installation_required="True"  toolshed="http://testtoolshed.g2.bx.psu.edu/"/>
                 </action>
                 <action type="shell_command">$R_HOME/bin/R CMD BATCH installBioC.R </action>
             </actions>

diff -r 65a97686ca5d -r b2ec6ec6ef74 rgedgeR/tool_dependencies.xml~
--- a/rgedgeR/tool_dependencies.xml~ Wed Jun 12 05:21:25 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

@@ -1,44 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-    <package name="package_readline_6_2" version="6.2">
-        <repository name="package_readline_6_2" owner="boris" prior_installation_required="True"
-                toolshed="http://testtoolshed.g2.bx.psu.edu/" />
-    </package>
-    <package name="package_R" version="3.0.1">
-        <install version="1.0">
-            <actions>
-                <action type="download_by_url">http://cran.ms.unimelb.edu.au/src/base/R-3/R-3.0.1.tar.gz</action>
-                <action type="set_environment_for_install">
-                    <repository changeset_revision="1301ec7705a8" name="package_readline_6_2" owner="boris"
-                            toolshed="http://testtoolshed.g2.bx.psu.edu/">
-                        <package name="package_readline_6_2" version="6.2" />
-                    </repository>
-                </action>
-                <action type="make_directory">$INSTALL_DIR</action>
-                <action type="shell_command">./configure --with-blas --with-lapack --enable-R-shlib --with-readline=no --with-x=no --prefix=$INSTALL_DIR && make && make install</action>
-                <action type="set_environment">
-                    <environment_variable action="set_to" name="R_HOME">$INSTALL_DIR/lib/R</environment_variable>
-                    <environment_variable action="set_to" name="R_LIBS">$INSTALL_DIR/lib/R/library</environment_variable>
-                    <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/lib/R/bin</environment_variable>
-                </action>
-            </actions>
-        </install>
-        <readme>R is a free software environment for statistical computing and graphics
-                WARNING: See custom compilation options above
-                Modified from an older version of R by Boris by Ross Lazarus for R 3.0
-                Added Bioc basics too
-       </readme>
-    </package>
-    <package name="package_BioCBasics" version="2.12">
-        <install version="1.0">
-            <actions>
-                <action type="shell_command">$INSTALL_DIR/lib/R/bin/R CMD BATCH installBioC.R </action>
-            </actions>
-        </install>
-        <readme>R is a free software environment for statistical computing and graphics
-                WARNING: See custom compilation options above
-                Modified from an older version of R by Boris by Ross Lazarus for R 3.0
-                Added Bioc basics via this package installBioC.R script
-       </readme>
-    </package>
-</tool_dependency>