Galaxy | (sandbox for testing) |

Changeset 5:06e51c47590d (2013-06-12)

Previous changeset 4:dea65c85afb4 (2013-06-12) Next changeset 6:a6e497d7baa2 (2013-06-12)

Commit message:
Uploaded

modified:
rgedgeR/rgedgeRpaired.xml

added:
rgedgeR/rgedgeRpaired.xml~

diff -r dea65c85afb4 -r 06e51c47590d rgedgeR/rgedgeRpaired.xml
--- a/rgedgeR/rgedgeRpaired.xml Wed Jun 12 03:45:24 2013 -0400
+++ b/rgedgeR/rgedgeRpaired.xml Wed Jun 12 03:51:48 2013 -0400

@@ -1,9 +1,9 @@
<tool id="rgedgeRpaired" name="edgeR" version="0.18">
   <description>1 or 2 level models for count data</description>
   <requirements>
-      <requirement type="package" version="6.2">">name=package_readline_6_2</requirement>
-      <requirement type="package" version="3.0.1">">name=package_R</requirement>
-      <requirement type="package" version="2.12">">name=package_BioCBasics</requirement>
+      <requirement type="package" version="6.2">name=package_readline_6_2</requirement>
+      <requirement type="package" version="3.0.1">name=package_R</requirement>
+      <requirement type="package" version="2.12">name=package_BioCBasics</requirement>
   </requirements>

   <command interpreter="python">

diff -r dea65c85afb4 -r 06e51c47590d rgedgeR/rgedgeRpaired.xml~
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rgedgeR/rgedgeRpaired.xml~ Wed Jun 12 03:51:48 2013 -0400

b'@@ -0,0 +1,633 @@\n+<tool id="rgedgeRpaired" name="edgeR" version="0.18">\n+ <description>1 or 2 level models for count data</description>\n+ <requirements>\n+ <requirement type="package" version="6.2">">name=package_readline_6_2</requirement>\n+ <requirement type="package" version="3.0.1">">name=package_R</requirement>\n+ <requirement type="package" version="2.12">">name=package_BioCBasics</requirement>\n+ </requirements>\n+ \n+ <command interpreter="python">\n+ rgToolFactory.py --script_path "$runme" --interpreter "Rscript" --tool_name "edgeR" \n+ --output_dir "$html_file.files_path" --output_html "$html_file" --output_tab "$outtab" --make_HTML "yes"\n+ </command>\n+ <inputs>\n+ <param name="input1" type="data" format="tabular" label="Select an input matrix - rows are contigs, columns are counts for each sample"\n+ help="Use the HTSeq based count matrix preparation tool to create these matrices from BAM/SAM files and a GTF file of genomic features"/>\n+ <param name="title" type="text" value="edgeR" size="80" label="Title for job outputs" help="Supply a meaningful name here to remind you what the outputs contain">\n+ <sanitizer invalid_char="">\n+ <valid initial="string.letters,string.digits"><add value="_" /> </valid>\n+ </sanitizer>\n+ </param>\n+ <param name="treatment_name" type="text" value="Treatment" size="50" label="Treatment Name"/>\n+ <param name="Treat_cols" label="Select columns containing treatment." type="data_column" data_ref="input1" numerical="True" \n+ multiple="true" use_header_names="true" size="120" display="checkboxes">\n+ <validator type="no_options" message="Please select at least one column."/>\n+ </param>\n+ <param name="control_name" type="text" value="Control" size="50" label="Control Name"/>\n+ <param name="Control_cols" label="Select columns containing control." type="data_column" data_ref="input1" numerical="True" \n+ multiple="true" use_header_names="true" size="120" display="checkboxes" optional="true">\n+ </param>\n+ <param name="subjectids" type="text" optional="true" size="120"\n+ label="IF SUBJECTS NOT ALL INDEPENDENT! Enter integers to indicate sample pairing for every column in input"\n+ help="Leave blank if no pairing, but eg if data from sample id A99 is in columns 2,4 and id C21 is in 3,5 then enter \'1,2,1,2\'">\n+ <sanitizer>\n+ <valid initial="string.digits"><add value="," /> </valid>\n+ </sanitizer>\n+ </param>\n+ <param name="fQ" type="float" value="0.3" size="5" label="Non-differential contig count quantile threshold - zero to analyze all non-zero read count contigs"\n+ help="May be a good or a bad idea depending on the biology and the question. EG 0.3 = sparsest 30% of contigs with at least one read are removed before analysis"/>\n+ <param name="useNDF" type="boolean" truevalue="T" checked=\'false\' falsevalue="" size="1" label="Non differential filter - remove contigs below a threshold (1 per million) for half or more samples"\n+ help="May be a good or a bad idea depending on the biology and the question. This was the old default. Quantile based is available as an alternative"/>\n+ <param name="priordf" type="integer" value="20" size="3" label="prior.df for tagwise dispersion - lower value = more emphasis on each tag\'s variance. Replaces prior.n and prior.df = prior.n * residual.df"\n+ help="Zero = Use edgeR default. Use a small value to \'smooth\' small samples. See edgeR docs and note below"/>\n+ <param name="fdrthresh" type="float" value="0.05" size="5" label="P value threshold for FDR filtering for amily wise error rate control"\n+ help="Conventional default value of 0.05 recommended"/>\n+ <param name="fdrtype" type="select" label="FDR (Type II error) control method" \n+ help="Use fdr or bh typically to control for the number of tests in a reliable way">\n+ <option value="fdr" selected="true">fdr</option>\n+ <option value="BH">B'..b'ethods.\n+\n+If you have (eg) paired samples and wish to include a term in the GLM to account for some other factor (subject in the case of paired samples),\n+put a comma separated list of indicators for every sample (whether modelled or not!) indicating (eg) the subject number or \n+A list of integers, one for each subject or an empty string if samples are all independent.\n+If not empty, there must be exactly as many integers in the supplied integer list as there are columns (samples) in the count matrix.\n+Integers for samples that are not in the analysis *must* be present in the string as filler even if not used.\n+\n+So if you have 2 pairs out of 6 samples, you need to put in unique integers for the unpaired ones\n+eg if you had 6 samples with the first two independent but the second and third pairs each being from independent subjects. you might use\n+8,9,1,1,2,2 \n+as subject IDs to indicate two paired samples from the same subject in columns 3/4 and 5/6\n+\n+**Output**\n+\n+A matrix which consists the original data and relative expression levels and some helpful plots\n+\n+**Note on edgeR versions**\n+\n+The edgeR authors made a small cosmetic change in the name of one important variable (from p.value to PValue) \n+breaking this and all other code that assumed the old name for this variable, \n+between edgeR2.4.4 and 2.4.6 (the version for R 2.14 as at the time of writing). \n+This means that all code using edgeR is sensitive to the version. I think this was a very unwise thing \n+to do because it wasted hours of my time to track down and will similarly cost other edgeR users dearly\n+when their old scripts break. This tool currently now works with 2.4.6.\n+\n+**Note on prior.N**\n+\n+http://seqanswers.com/forums/showthread.php?t=5591 says:\n+\n+*prior.n*\n+\n+The value for prior.n determines the amount of smoothing of tagwise dispersions towards the common dispersion. \n+You can think of it as like a "weight" for the common value. (It is actually the weight for the common likelihood \n+in the weighted likelihood equation). The larger the value for prior.n, the more smoothing, i.e. the closer your \n+tagwise dispersion estimates will be to the common dispersion. If you use a prior.n of 1, then that gives the \n+common likelihood the weight of one observation.\n+\n+In answer to your question, it is a good thing to squeeze the tagwise dispersions towards a common value, \n+or else you will be using very unreliable estimates of the dispersion. I would not recommend using the value that \n+you obtained from estimateSmoothing()---this is far too small and would result in virtually no moderation \n+(squeezing) of the tagwise dispersions. How many samples do you have in your experiment? \n+What is the experimental design? If you have few samples (less than 6) then I would suggest a prior.n of at least 10. \n+If you have more samples, then the tagwise dispersion estimates will be more reliable, \n+so you could consider using a smaller prior.n, although I would hesitate to use a prior.n less than 5. \n+\n+\n+From Bioconductor Digest, Vol 118, Issue 5, Gordon writes:\n+\n+Dear Dorota,\n+\n+The important settings are prior.df and trend.\n+\n+prior.n and prior.df are related through prior.df = prior.n * residual.df,\n+and your experiment has residual.df = 36 - 12 = 24. So the old setting of\n+prior.n=10 is equivalent for your data to prior.df = 240, a very large\n+value. Going the other way, the new setting of prior.df=10 is equivalent\n+to prior.n=10/24.\n+\n+To recover old results with the current software you would use\n+\n+ estimateTagwiseDisp(object, prior.df=240, trend="none")\n+\n+To get the new default from old software you would use\n+\n+ estimateTagwiseDisp(object, prior.n=10/24, trend=TRUE)\n+\n+Actually the old trend method is equivalent to trend="loess" in the new\n+software. You should use plotBCV(object) to see whether a trend is\n+required.\n+\n+Note you could also use\n+\n+ prior.n = getPriorN(object, prior.df=10)\n+\n+to map between prior.df and prior.n.\n+\n+</help>\n+\n+</tool>\n+\n+\n'