Mercurial > repos > plus91-technologies-pvt-ltd > ss_test_tool
comparison 2.4/script/Bam2pair.pl @ 0:00b9898b8510 draft
Uploaded
| author | plus91-technologies-pvt-ltd |
|---|---|
| date | Wed, 04 Jun 2014 03:41:27 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:00b9898b8510 |
|---|---|
| 1 #!/usr/bin/perl | |
| 2 #Author Steven Hart, PhD | |
| 3 #11-15-2012 | |
| 4 #Convert and filter BAM files into merged bed | |
| 5 #Output should be | |
| 6 #chrA StartA EndA chrB StartB EndB Gene_id #supportingReads StrandA StrandB | |
| 7 #chr9 1000 5000 chr9 3000 3800 bedpe_example2 100 - + | |
| 8 | |
| 9 use Cwd; | |
| 10 use File::Basename; | |
| 11 #Usage | |
| 12 sub usage(){ | |
| 13 print "Usage: perl Bam2Pair.pl -b <BAM> -o <outfile>\n | |
| 14 -isize [10000]\t\tThe insert size to be considered discordant\n | |
| 15 -winsize [10000]\tThe distance between mate pairs to be considered the same\n | |
| 16 -min [1]\t\tThe minimum number of reads required to support an SV event\n | |
| 17 -prefix need a random prefix so files with the same name don't get created\n\n" | |
| 18 ; | |
| 19 } | |
| 20 $bedtools=`which intersectBed`; | |
| 21 $samtools=`which samtools`; | |
| 22 | |
| 23 if(!defined($bedtools)){die "BEDtools must be installed\n";} | |
| 24 if(!defined($samtools)){die "Samtools must be installed\n";} | |
| 25 use Getopt::Long; | |
| 26 #Declare variables | |
| 27 GetOptions( | |
| 28 'b=s' => \$BAM_FILE, #path to bam | |
| 29 'out=s' => \$outfile, #path to output | |
| 30 'java:s' => \$java , | |
| 31 'chrom:s' => \$chrom , | |
| 32 'isize=i' => \$isize, | |
| 33 'winsize=i' => \$winsize, | |
| 34 'prefix=s' => \$prefix, | |
| 35 'min=i' => \$minSupport, | |
| 36 'blacklist:s' => \$new_blacklist, | |
| 37 'q=s' => \$qual, | |
| 38 'v' => \$verbose | |
| 39 ); | |
| 40 #if(defined($picard_path)){$picard_path=$picard_path} else {die "Must specify a path to PICARD so that files can be sorted and indexed properly\n"}; | |
| 41 if(!defined($BAM_FILE)){die "Must specify a BAM file!\n".usage();} | |
| 42 if(!defined($outfile)){die "Must specify an out filename!\n".usage();} | |
| 43 if(!defined($java)){$java=$java;}else{$java=`which java`} | |
| 44 if(!defined($qual)){$qual=20} | |
| 45 if($new_blacklist){$new_blacklist=" -L $new_blacklist"} | |
| 46 | |
| 47 | |
| 48 $Filter_BAM=$BAM_FILE; | |
| 49 | |
| 50 @bam=split("/",$Filter_BAM); | |
| 51 $Filter_BAM=@bam[@bam-1]; | |
| 52 $Filter_BAM=~s/.bam/$prefix.bam/; | |
| 53 $Filter_sam=$Filter_BAM; | |
| 54 $Filter_sam=~s/.bam/.sam/; | |
| 55 | |
| 56 | |
| 57 | |
| 58 | |
| 59 print "\nLooking for Discordant read pairs (and Unmated reads) without soft-clips\n"; | |
| 60 | |
| 61 #$command=join("","samtools view -h -q 20 -f 1 -F 1804 ",$BAM_FILE," ",$chrom," ",$new_blacklist," | awk -F\'\\t\' \'{if (\$9 > ", $isize, " || \$9 < -",$isize," || \$9 == 0 || \$1 ~ /^@/) print \$0}' > ",$Filter_sam); | |
| 62 | |
| 63 | |
| 64 #Change command to allow reads where mate is unmapped & remove qual | |
| 65 $command=join("","samtools view -h -f 1 -F 1800 -q $qual ",$BAM_FILE," ",$chrom," ",$new_blacklist," | awk -F\'\\t\' \'{if (\$9 > ", $isize, " || \$9 < -",$isize," || \$9 == 0 || \$1 ~ /^@/) print \$0}' > ",$Filter_sam); | |
| 66 | |
| 67 print "$command\n" if ($verbose); | |
| 68 system($command); | |
| 69 $path = dirname(__FILE__); | |
| 70 $Filter_cluster=$Filter_sam; | |
| 71 $Filter_cluster=~s/.sam/.cluster/; | |
| 72 $command=join("",$path,"/ReadCluster.pl -i=$Filter_sam -o=$Filter_cluster -m $minSupport"); | |
| 73 if($verbose){print "\n$command\n"}; | |
| 74 | |
| 75 system($command); | |
| 76 | |
| 77 ################################## | |
| 78 #Now there are 2 SAM files of filtered reads | |
| 79 #.filter.cluster.inter.sam | |
| 80 #.filter.cluster.intra.sam | |
| 81 $result_pe=join("",$Filter_cluster,".out"); | |
| 82 $command=join("","cat ",$Filter_cluster,".int\*|perl -ane 'next if(\@F[0]=~/^\@/);if(\@F[6]!~/=/){print join(\"\\t\",\$F[11],\@F[2],\@F[3],\@F[6],\@F[7],\"\\n\")}else{print join(\"\\t\",\$F[11],\@F[2],\@F[3],\@F[2],\@F[7],\"\\n\")}' >",$result_pe); | |
| 83 if($verbose){print $command."\n"}; | |
| 84 system($command); | |
| 85 #my ($sample, $chrstart, $start, $chrend, $end) | |
| 86 $command=join("","cat ",$result_pe," | ",$path,"/cluster.pair.pl ",$winsize," |awk '(\$6 >",$minSupport,")' >> ", $outfile); | |
| 87 if($verbose){print $command."\n"}; | |
| 88 system($command); | |
| 89 $filt1=join("",$Filter_cluster,".inter.sam"); | |
| 90 $filt2=join("",$Filter_cluster,".intra.sam"); | |
| 91 | |
| 92 | |
| 93 unlink($Filter_sam,$filt1,$filt2,$result_pe); | |
| 94 | |
| 95 ######################################### | |
| 96 #Now determine if left or righ clipping surrogate | |
| 97 print "\nBam2pair.pl Done\n"; | |
| 98 |
