changeset 6:efb356d09d6d draft default tip

Uploaded
author kellrott
date Mon, 24 Jun 2013 13:37:27 -0400
parents 83f2acca2387
children
files matrix_manipulate/floatMatrix.pyc matrix_manipulate/matrix_filter.py matrix_manipulate/matrix_filter.xml matrix_manipulate/quartile_norm.pl
diffstat 4 files changed, 114 insertions(+), 17 deletions(-) [+]
line wrap: on
line diff
Binary file matrix_manipulate/floatMatrix.pyc has changed
--- a/matrix_manipulate/matrix_filter.py	Thu Jun 13 16:54:40 2013 -0400
+++ b/matrix_manipulate/matrix_filter.py	Mon Jun 24 13:37:27 2013 -0400
@@ -9,16 +9,21 @@
 
 def value_eval(code, values, label, label_set):
     funcmap = {
-        "len":len,
         "values" : values,
         "label" : label,
         "label_set" : label_set,
+    }
+    builtins = {
         "math" : math,
+        "list" : list,
         "sum" : sum,
         "min" : min,
-        "max" : max
-     }
-    return eval(code,{"__builtins__":None},funcmap)
+        "max" : max,
+        "len" : len,
+        "True" : True,
+        "False" : False
+    }
+    return eval(code,{"__builtins__": builtins},funcmap)
          
 
 if __name__ == "__main__":
--- a/matrix_manipulate/matrix_filter.xml	Thu Jun 13 16:54:40 2013 -0400
+++ b/matrix_manipulate/matrix_filter.xml	Mon Jun 24 13:37:27 2013 -0400
@@ -13,23 +13,13 @@
 	<inputs>
 		<param name="row_txt" type="text" area="True" size="5x35" label="Row Eval Code" optional="True">
 			<sanitizer>
-				<valid initial="string.printable">
-					<remove value="&quot;"/>
-				</valid>
-				<mapping initial="none">
-					<add source="&quot;" target="\&quot;"/>
-					<add source="\" target="\\"/>
-				</mapping>
+				<valid initial="string.printable"/>
 			</sanitizer>
 		</param>
 		<param name="col_txt" type="text" area="True" size="5x35" label="Column Eval Code" optional="True">
 			<sanitizer>
-				<valid initial="string.printable">
+				<valid initial="string.printable"/>
 					<remove value="&quot;"/>
-				</valid>
-				<mapping initial="none">
-					<add source="&quot;" target="\&quot;"/>
-				</mapping>
 			</sanitizer>
 		</param>
 		<param name="matrix" type="data" format="tabular" label="Matrix"/>
@@ -45,7 +35,7 @@
 	</configfiles>
 	
 	<help>
-This is a utility to perform filtering operations on the rows and columns of a tabular file.
+This is a utility to perform filtering operations on the rows and columns of a numeric matrix file.
 
 - The 'Column Eval Code' operations occur on the first line.
 - The 'Row Eval Code' operations occur on the first cell of every line
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/matrix_manipulate/quartile_norm.pl	Mon Jun 24 13:37:27 2013 -0400
@@ -0,0 +1,102 @@
+#!/usr/bin/perl
+
+use strict;
+use Getopt::Long;
+
+my $out = '-';
+my $q = 75;
+my @col;
+my @also;
+my $names = 1;
+my $target = 1000;
+my $skip = 0;
+my $min=1;
+GetOptions("quant=i"=>\$q, "target=i"=>\$target, "col=i@"=>\@col, "out=s"=>\$out, "also=i@"=>\@also, "skip=i"=>\$skip, "min=i"=>\$min);
+
+my $in = shift @ARGV;
+
+die usage() unless $in && @col;
+
+open(OUT, ($out eq '-') ? '<&STDOUT' : ">$out") || die "Can't open $out\n";
+open(IN, ($in eq '-') ? '<&STDIN' : $in) || die "Can't open $in\n";
+
+@also = (1) if !@also && !grep {$_ eq '1'} @col;
+
+map {$_--} @col;
+map {$_--} @also;
+
+my @d;
+my $cnt = 0;
+my $head ='';
+while(<IN>) {
+        if ($skip) {
+                --$skip;
+                $head .= $_;
+                next;
+        }
+        chomp;
+        my @f = split /\t/;
+        if ($col[0] eq '-2') {
+                @col = (1..$#f);
+        }
+        for (@col) {
+                push @{$d[$_]}, $f[$_];
+        }
+        for (@also) {
+                push @{$d[$_]}, $f[$_];
+        }
+        ++$cnt;
+}
+for (@col) {
+        my @t = grep {$_>=$min} @{$d[$_]};
+        @t = sort {$a <=> $b} @t;
+        my $t=quantile(\@t, $q/100);
+        for (@{$d[$_]}) {
+                $_= sprintf "%.4f", $target*$_/$t;
+        }
+}
+
+my @out = (sort {$a <=> $b} (@col, @also));
+
+print OUT $head;
+
+for (my $i=0;$i<$cnt;++$i) {
+        for my $j (@out) {
+                print OUT "\t" unless $j == $out[0];
+                print OUT $d[$j][$i];
+        }
+        print OUT "\n";
+}
+
+
+sub usage {
+<<EOF;
+Usage: $0 -c COL [opts] FILE
+
+Returns an upper quartile normalization of data in column(s) COL
+of file FILE.
+
+Col is 1-based, zeroes are ignores when calculating upper quartile
+
+Options:
+   -c|col COL    normalize this column of data (can specify more than once, or -1 for all but first col)
+   -q|quant INT  quantile to use (75)
+   -t|target INT target to use (1000)
+   -a|also COL   output these columns also
+   -o|out FILE   output to this file instead of stdout
+   -m|min INT    minimum value (1)
+   -s|skip INT   skip header rows
+EOF
+}
+
+sub quantile {
+        my ($a,$p) = @_;
+        my $l = scalar(@{$a});
+        my $t = ($l-1)*$p;
+        my $v=$a->[int($t)];
+        if ($t > int($t)) {
+                return $v + $p * ($a->[int($t)+1] - $v);
+        } else {
+                return $v;
+        }
+}