Mercurial > repos > greg > snpeff_v2_from_pablo
diff snpEff_2_1a/scripts/joinSnpEff.pl @ 0:f8eaa3f8194b default tip
Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
author | greg |
---|---|
date | Fri, 20 Apr 2012 14:47:09 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/snpEff_2_1a/scripts/joinSnpEff.pl Fri Apr 20 14:47:09 2012 -0400 @@ -0,0 +1,125 @@ +#!/usr/bin/perl + +#------------------------------------------------------------------------------ +# +# Mark snps as X1, X2 or 'Both' +# +#------------------------------------------------------------------------------ + +use strict; + +my($debug) = 0; + +#------------------------------------------------------------------------------ +# Read a file and index lines by SNP +#------------------------------------------------------------------------------ +sub readSnps($) { + my($file) = (@_); + my($l, %snps); + + open SNP, $file || die "Cannot open file '$file'\n"; + while( $l = <SNP> ) { + my($chr, $pos, $ref, $var) = split /\t/, $l; + my($snp) = "$chr:$pos\_$ref/$var"; + $snps{$snp} .= $l; + } + close SNP; + return %snps; +} + +#------------------------------------------------------------------------------ +# Print SNP info and quals +#------------------------------------------------------------------------------ +sub printLine($$$$) { + my($snp, $lines, $quals, $q) = (@_); + my($line, @lines); + (@lines) = split '\n', $lines; + foreach $line ( @lines ) { + my($l) = replaceSnpQ($line, $q); + print "$l\t$quals\n"; + } +} + +#------------------------------------------------------------------------------ +# Parse snp quality parameter +#------------------------------------------------------------------------------ +sub parseSnpQ($) { + my($l) = @_; + my(@t); + (@t) = split /\t/,$l; + return $t[6]; +} + +#------------------------------------------------------------------------------ +# Replace a quality +#------------------------------------------------------------------------------ +sub replaceSnpQ($$) { + my($line, $q) = @_; + my(@t); + (@t) = split /\t/, $line; + $t[1] = $q; + return join("\t", @t); +} + +#------------------------------------------------------------------------------ +# Main +#------------------------------------------------------------------------------ +# Read arguments +my(@file); +(@file) = @ARGV; +if( $#file <= 0 ) { die "Usage: ./joinSnpEff.pl tag1 file1 tag2 file2 ... tagN fileN\n"; } + +# Parse arguments +print STDERR "Reading files:\n"; +my($i, $j, $file, $tag, $snp, @snpsAll, %snps, @tags); +for( $i=0 , $j=0 ; $i < $#ARGV ; $i+=2, $j++ ) { + # Read tag + $tags[$j] = $tag = $ARGV[$i]; + + # Read file + $file = $ARGV[$i+1]; + if( $file eq '' ) { die "Missing file for tag '$tag'\n"; } + print STDERR "\tTags[$j]: $tag\t'$file'\n"; + %snps = readSnps($file); + + # Add all snps + foreach $snp ( keys %snps ) { $snpsAll[$j]->{$snp} = $snps{$snp}; } +} + +#--- +# Print SNPS +#--- +my($snp, %done, %snpsi); +my($j, $jj); +$i = 0; +print STDERR "Joining SNP from all files\n"; +for( $i=0 ; $i <= $#tags ; $i++ ) { # For all tags + print "TAG:\ttags[$i] = '$tags[$i]'\n" if $debug; + my($uniq, $shared) = (0, 0); + %snpsi = %{$snpsAll[$i]}; + foreach $snp (sort keys %snpsi) { # For all snps... + if( ! $done{$snp} ) { # Not done yet? + + # Get qualities from all SNPs + my($quals, $qSum, $qCount) = ("", 0, 0); + my($all) = "ALL "; + for( $j=0, $jj=1 ; $j <= $#snpsAll ; $j++ , $jj++ ) { + if( exists $snpsAll[$j]{$snp} ) { + my($q) = parseSnpQ($snpsAll[$j]->{$snp}); + $quals .= "$tags[$j]:$q "; + $qSum += $q; + $qCount++; + } else { $all = ""; } + } + + if( $qCount <= 1 ) { $uniq++; } # Count unique SNPs for this file + else { $shared++; } + + $done{$snp} = 1; + my($qAvg) = ( $qCount > 0 ? int($qSum/$qCount) : 0); + printLine($snp, $snpsi{$snp}, "$all $quals", $qAvg); + } else { $shared++; } + } + print STDERR "\tTags[$i]: $tags[$i]\tUnique / Shared snps: $uniq / $shared\n"; +} +