Mercurial > repos > geert-vandeweyer > clusterfast
view cf_scripts/filter_contigs.0403.pl @ 19:f51dd12f2263 draft
Uploaded
author | geert-vandeweyer |
---|---|
date | Mon, 28 Jul 2014 13:34:56 -0400 |
parents | 4a3afa90ff7f |
children |
line wrap: on
line source
#!/usr/bin/perl use strict; my $tol=300; my $assembler=$ARGV[3]; #read in .psl file (blat output), open(PSL, "$ARGV[0]") or die "Can't open .psl.\n"; my %blat_results; #hash blat results by query name while(my $line=<PSL>) { chomp($line); my ($match, $mm, $rm, $nn, $qgpct, $qgpb, $tgpct, $tgpb, $strand, $qname, $qsize, $qstart, $qend, $tname, $tsize, $tstart, $tend, $stuff)=split(/\s+/, $line); if( ! exists $blat_results{$qname}) { $blat_results{$qname}=(); } push (@{$blat_results{$qname}}, $line); } close(PSL); my %good_contigs; my %good_blats; #read in cf output open(QP, "$ARGV[1]"); while(my $line=<QP>) { chomp($line); my $pf="pass"; my ($x, $chr1, $pos1, $chr2, $pos2, $or, $ct, $splct)=split(/\s+/, $line); if($pf eq "pass") { foreach my $key(keys %blat_results) { my @score=(0,0); my @blat_out=(); for(my $j=0; $j<@{$blat_results{$key}}; $j++) { my ($match, $mm, $rm, $nn, $qgpct, $qgpb, $tgpct, $tgpb, $strand, $qname, $qsize, $qstart, $qend, $tname, $tsize, $tstart, $tend, $stuff)=split(/\s+/, $blat_results{$key}[$j]); if(($tname eq $chr1) && ($pos1>$tstart-$tol)&&($pos1<$tend+$tol)) { $score[0]++; push(@blat_out, $blat_results{$key}[$j]); } if(($tname eq $chr2) && ($pos2>$tstart-$tol)&&($pos2<$tend+$tol)) { $score[1]++; push(@blat_out, $blat_results{$key}[$j]); } } if($score[0]>0 && $score[1]>0) { # if there is a contig with blat results corresponding to both sides of the cf result $good_contigs{$key}=$line; @{$good_blats{$key}}=@blat_out; } } } } close(QP); my %all_ctgs; print STDOUT "*********************************************\n$assembler results\n\n"; if(keys(%good_contigs)>0) { # read in fasta file containing contigs open(CT, "$ARGV[2]") or die "Can't open contigs.\n"; my $curr_ctg=""; while (my $line=<CT>) { chomp($line); if(substr($line, 0, 1) eq ">") { $curr_ctg=substr($line,1); $all_ctgs{$curr_ctg}=(); #read each contig into its own array } else { push(@{$all_ctgs{$curr_ctg}}, $line); } } close(CT); foreach my $key(keys %all_ctgs) { if(exists $good_contigs{$key}) { my $ctg=""; for(my $k=0; $k<@{$all_ctgs{$key}}; $k++) { $ctg.=$all_ctgs{$key}[$k]; } foreach(@{$good_blats{$key}}) { my ($match, $mm, $rm, $nn, $qgpct, $qgpb, $tgpct, $tgpb, $strand, $qname, $qsize, $qstart, $qend, $tname, $size, $tstart, $tend, $stuff)=split(/\s+/, $_); print STDOUT "$qname\t$qstart\t$qend\t$tname\t$tstart\t$tend\t$strand\n"; } print STDOUT "$key $good_contigs{$key}\n$ctg\n"; } } }