# HG changeset patch
# User fubar
# Date 1370604865 14400
# Node ID 13f82e0a397c53d7c1486f03defd03d4f02d80c8
# Parent 63eaa64904be19250aa305a1bb1b56fc5b266859
Should now pass functional test. Painful business
diff -r 63eaa64904be -r 13f82e0a397c htseq_bams_to_count_matrix/generatetest.sh
--- a/htseq_bams_to_count_matrix/generatetest.sh Fri Jun 07 02:59:49 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,2 +0,0 @@
-#python ../htseqsams2mx.py -g rn4_chr20_100k.gtf -o test.xls --samf "'rn4chr20test1.bam','col1'" --samf "'rn4chr20test2.bam','col2'"
-python ../htseqsams2mx.py -g rn4_chr20_100k.gtf -o htseqsams2mx_test1_out.xls --samf "'rn4chr20test1.bam',''" --samf "'rn4chr20test2.bam',''"
diff -r 63eaa64904be -r 13f82e0a397c htseq_bams_to_count_matrix/htseqsams2mx.py
--- a/htseq_bams_to_count_matrix/htseqsams2mx.py Fri Jun 07 02:59:49 2013 -0400
+++ b/htseq_bams_to_count_matrix/htseqsams2mx.py Fri Jun 07 07:34:25 2013 -0400
@@ -121,7 +121,7 @@
sys.stdout.write( "Warning: No features of type '%s' found.\n" % opts.feature_type )
for sami,sam_filename in enumerate(sam_filenames):
colname = colnames[sami]
- isbam = sam_filename.endswith('.bam')
+ isbam = colname.endswith('.bam')
try:
if isbam:
read_seq = HTSeq.BAM_Reader( sam_filename )
diff -r 63eaa64904be -r 13f82e0a397c htseq_bams_to_count_matrix/htseqsams2mx.xml
--- a/htseq_bams_to_count_matrix/htseqsams2mx.xml Fri Jun 07 02:59:49 2013 -0400
+++ b/htseq_bams_to_count_matrix/htseqsams2mx.xml Fri Jun 07 07:34:25 2013 -0400
@@ -4,12 +4,14 @@
+
htseqsams2mx.py -g "$gfffile" -o "$outfile" -m "$model" --id_attribute "$id_attr" --feature_type "$feature_type"
--samf "'$firstsamf','${firstsamf.name}'"
@@ -23,7 +25,7 @@
--samf "'$fourthsamf','${fourthsamf.name}'"
#end if
#for $s in $samfiles:
- #if $s.samf != None:
+ #if $s.samf:
--samf "'${s.samf}','${s.samf.name}'"
#end if
#end for
@@ -33,7 +35,7 @@
-
+
transcript
+ format="sam,bam" optional="false"/>
+ format="sam,bam" optional="false"/>
-
-
+
+
+
-
-
+
+
@@ -99,20 +102,21 @@
----
-**Tool author's plea on the importance of replicates**
+**Author's plea on replicates**
-If you want the downstream p values to inform you about your data in terms of rejecting or accepting the null hypothesis
-under random sampling from the universe of possible biological/experimental replicates from which your data was drawn,
+If you want to interpret the downstream p values in terms of rejecting or accepting the null hypothesis
+under random sampling with replacement from the universe of possible biological/experimental replicates from which your data was derived,
which is what published p values are often assumed to do, then you need biological
(or for cell culture material experimental) replicates.
Using technical or no replicates means the downstream p values are not interpretable the way most people would assume
they are - ie as the probability of obtaining a result as or more extreme as your experimental data
-in millions of experiments conducted under the null hypothesis.
+in millions of experiments conducted using the same methods under the null hypothesis.
There is no way around this and it is scientific fraud to ignore this issue and publish bogus p values derived from
technical or no replicates without making the lack of biological or experimental error in the p value calculations
-clear to your readers.
+clear to your readers so they can adjust their expectations. However, the buck stops here at higher level inference.
+If you have no replicates, you must not use this tool as the p values are uninterpretable. So there.
See your stats 101 notes on the central limit theorem and test statistics for a refresher or talk to a
statistician if this makes no sense please.
@@ -129,9 +133,13 @@
It will be automatically installed if you use the toolshed as in general, you probably should.
HTSeq_ must be installed with this tool if you install manually.
-Otherwise, all code and documentation comprising this tool was written by Ross Lazarus and is
+Otherwise, all code and documentation comprising this tool including the requirement
+for more than one sample bam
+was written by Ross Lazarus and is
licensed to you under the LGPL_ like other rgenetics artefacts
+Sorry, don't use so can't be buggered with read groups - contributions welcome - send code
+
.. _LGPL: http://www.gnu.org/copyleft/lesser.html
.. _HTSeq: http://www-huber.embl.de/users/anders/HTSeq/doc/index.html
diff -r 63eaa64904be -r 13f82e0a397c htseq_bams_to_count_matrix/test-data/generatetest.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/htseq_bams_to_count_matrix/test-data/generatetest.sh Fri Jun 07 07:34:25 2013 -0400
@@ -0,0 +1,2 @@
+#python ../htseqsams2mx.py -g rn4_chr20_100k.gtf -o test.xls --samf "'rn4chr20test1.bam','col1'" --samf "'rn4chr20test2.bam','col2'"
+python ../htseqsams2mx.py -g rn4_chr20_100k.gtf -o htseqsams2mx_test1_out.xls --samf "'rn4chr20test1.bam',''" --samf "'rn4chr20test2.bam',''"