Mercurial > repos > kellrott > matrix_manipulate
changeset 6:efb356d09d6d draft default tip
Uploaded
author | kellrott |
---|---|
date | Mon, 24 Jun 2013 13:37:27 -0400 |
parents | 83f2acca2387 |
children | |
files | matrix_manipulate/floatMatrix.pyc matrix_manipulate/matrix_filter.py matrix_manipulate/matrix_filter.xml matrix_manipulate/quartile_norm.pl |
diffstat | 4 files changed, 114 insertions(+), 17 deletions(-) [+] |
line wrap: on
line diff
--- a/matrix_manipulate/matrix_filter.py Thu Jun 13 16:54:40 2013 -0400 +++ b/matrix_manipulate/matrix_filter.py Mon Jun 24 13:37:27 2013 -0400 @@ -9,16 +9,21 @@ def value_eval(code, values, label, label_set): funcmap = { - "len":len, "values" : values, "label" : label, "label_set" : label_set, + } + builtins = { "math" : math, + "list" : list, "sum" : sum, "min" : min, - "max" : max - } - return eval(code,{"__builtins__":None},funcmap) + "max" : max, + "len" : len, + "True" : True, + "False" : False + } + return eval(code,{"__builtins__": builtins},funcmap) if __name__ == "__main__":
--- a/matrix_manipulate/matrix_filter.xml Thu Jun 13 16:54:40 2013 -0400 +++ b/matrix_manipulate/matrix_filter.xml Mon Jun 24 13:37:27 2013 -0400 @@ -13,23 +13,13 @@ <inputs> <param name="row_txt" type="text" area="True" size="5x35" label="Row Eval Code" optional="True"> <sanitizer> - <valid initial="string.printable"> - <remove value="""/> - </valid> - <mapping initial="none"> - <add source=""" target="\""/> - <add source="\" target="\\"/> - </mapping> + <valid initial="string.printable"/> </sanitizer> </param> <param name="col_txt" type="text" area="True" size="5x35" label="Column Eval Code" optional="True"> <sanitizer> - <valid initial="string.printable"> + <valid initial="string.printable"/> <remove value="""/> - </valid> - <mapping initial="none"> - <add source=""" target="\""/> - </mapping> </sanitizer> </param> <param name="matrix" type="data" format="tabular" label="Matrix"/> @@ -45,7 +35,7 @@ </configfiles> <help> -This is a utility to perform filtering operations on the rows and columns of a tabular file. +This is a utility to perform filtering operations on the rows and columns of a numeric matrix file. - The 'Column Eval Code' operations occur on the first line. - The 'Row Eval Code' operations occur on the first cell of every line
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/matrix_manipulate/quartile_norm.pl Mon Jun 24 13:37:27 2013 -0400 @@ -0,0 +1,102 @@ +#!/usr/bin/perl + +use strict; +use Getopt::Long; + +my $out = '-'; +my $q = 75; +my @col; +my @also; +my $names = 1; +my $target = 1000; +my $skip = 0; +my $min=1; +GetOptions("quant=i"=>\$q, "target=i"=>\$target, "col=i@"=>\@col, "out=s"=>\$out, "also=i@"=>\@also, "skip=i"=>\$skip, "min=i"=>\$min); + +my $in = shift @ARGV; + +die usage() unless $in && @col; + +open(OUT, ($out eq '-') ? '<&STDOUT' : ">$out") || die "Can't open $out\n"; +open(IN, ($in eq '-') ? '<&STDIN' : $in) || die "Can't open $in\n"; + +@also = (1) if !@also && !grep {$_ eq '1'} @col; + +map {$_--} @col; +map {$_--} @also; + +my @d; +my $cnt = 0; +my $head =''; +while(<IN>) { + if ($skip) { + --$skip; + $head .= $_; + next; + } + chomp; + my @f = split /\t/; + if ($col[0] eq '-2') { + @col = (1..$#f); + } + for (@col) { + push @{$d[$_]}, $f[$_]; + } + for (@also) { + push @{$d[$_]}, $f[$_]; + } + ++$cnt; +} +for (@col) { + my @t = grep {$_>=$min} @{$d[$_]}; + @t = sort {$a <=> $b} @t; + my $t=quantile(\@t, $q/100); + for (@{$d[$_]}) { + $_= sprintf "%.4f", $target*$_/$t; + } +} + +my @out = (sort {$a <=> $b} (@col, @also)); + +print OUT $head; + +for (my $i=0;$i<$cnt;++$i) { + for my $j (@out) { + print OUT "\t" unless $j == $out[0]; + print OUT $d[$j][$i]; + } + print OUT "\n"; +} + + +sub usage { +<<EOF; +Usage: $0 -c COL [opts] FILE + +Returns an upper quartile normalization of data in column(s) COL +of file FILE. + +Col is 1-based, zeroes are ignores when calculating upper quartile + +Options: + -c|col COL normalize this column of data (can specify more than once, or -1 for all but first col) + -q|quant INT quantile to use (75) + -t|target INT target to use (1000) + -a|also COL output these columns also + -o|out FILE output to this file instead of stdout + -m|min INT minimum value (1) + -s|skip INT skip header rows +EOF +} + +sub quantile { + my ($a,$p) = @_; + my $l = scalar(@{$a}); + my $t = ($l-1)*$p; + my $v=$a->[int($t)]; + if ($t > int($t)) { + return $v + $p * ($a->[int($t)+1] - $v); + } else { + return $v; + } +}