# HG changeset patch # User devteam # Date 1377005975 14400 # Node ID 137ec95c8ca6f3a1a70fddd3ce7496b327217851 Uploaded tool tarball. diff -r 000000000000 -r 137ec95c8ca6 dividePgSnpAlleles.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dividePgSnpAlleles.pl Tue Aug 20 09:39:35 2013 -0400 @@ -0,0 +1,41 @@ +#!/usr/bin/perl -w +use strict; + +#divide the alleles and their information into separate columns for pgSnp-like +#files. Keep any additional columns beyond the pgSnp ones. +#reads from stdin, writes to stdout +my $ref; +my $in; +if (@ARGV && $ARGV[0] =~ /-ref=(\d+)/) { + $ref = $1 -1; + if ($ref == -1) { undef $ref; } + shift @ARGV; +} +if (@ARGV) { + $in = shift @ARGV; +} + +open(FH, $in) or die "Couldn't open $in, $!\n"; +while () { + chomp; + my @f = split(/\t/); + my @a = split(/\//, $f[3]); + my @fr = split(/,/, $f[5]); + my @sc = split(/,/, $f[6]); + if ($f[4] == 1) { #homozygous add N, 0, 0 + if ($ref) { push(@a, $f[$ref]); } + else { push(@a, "N"); } + push(@fr, 0); + push(@sc, 0); + } + if ($f[4] > 2) { next; } #skip those with more than 2 alleles + print "$f[0]\t$f[1]\t$f[2]\t$a[0]\t$fr[0]\t$sc[0]\t$a[1]\t$fr[1]\t$sc[1]"; + if (scalar @f > 7) { + splice(@f, 0, 7); #remove first 7 + print "\t", join("\t", @f), "\n"; + }else { print "\n"; } +} +close FH; + +exit; + diff -r 000000000000 -r 137ec95c8ca6 dividePgSnpAlleles.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dividePgSnpAlleles.xml Tue Aug 20 09:39:35 2013 -0400 @@ -0,0 +1,76 @@ + diff -r 000000000000 -r 137ec95c8ca6 dividePgSnpAlleles.xml.bak --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dividePgSnpAlleles.xml.bak Tue Aug 20 09:39:35 2013 -0400 @@ -0,0 +1,76 @@ + diff -r 000000000000 -r 137ec95c8ca6 test-data/dividePgSnp_input.pgSnp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/dividePgSnp_input.pgSnp Tue Aug 20 09:39:35 2013 -0400 @@ -0,0 +1,10 @@ +chr1 256 257 A/C 2 4,5 0,0 +chr1 56100 56101 A 1 8 0 +chr1 77052 77053 A/G 2 3,5 0,0 +chr1 110904 110905 A 1 5 0 +chr1 160592 160593 G 1 3 0 +chr1 640353 640354 G 1 1 0 +chr1 695314 695315 A 1 7 0 +chr1 713681 713682 A 1 8 0 +chr1 713965 713966 A/G 2 3,2 0,0 +chr1 714056 714057 A/G 2 1,5 0,0 diff -r 000000000000 -r 137ec95c8ca6 test-data/dividePgSnp_output.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/dividePgSnp_output.txt Tue Aug 20 09:39:35 2013 -0400 @@ -0,0 +1,10 @@ +chr1 256 257 A 4 0 C 5 0 +chr1 56100 56101 A 8 0 N 0 0 +chr1 77052 77053 A 3 0 G 5 0 +chr1 110904 110905 A 5 0 N 0 0 +chr1 160592 160593 G 3 0 N 0 0 +chr1 640353 640354 G 1 0 N 0 0 +chr1 695314 695315 A 7 0 N 0 0 +chr1 713681 713682 A 8 0 N 0 0 +chr1 713965 713966 A 3 0 G 2 0 +chr1 714056 714057 A 1 0 G 5 0