annotate 2.4/src/Bam2pair.pl @ 0:00b9898b8510 draft

Uploaded
author plus91-technologies-pvt-ltd
date Wed, 04 Jun 2014 03:41:27 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
1 #!/usr/bin/perl
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
2 #Author Steven Hart, PhD
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
3 #11-15-2012
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
4 #Convert and filter BAM files into merged bed
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
5 #Output should be
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
6 #chrA StartA EndA chrB StartB EndB Gene_id #supportingReads StrandA StrandB
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
7 #chr9 1000 5000 chr9 3000 3800 bedpe_example2 100 - +
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
8
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
9 use Cwd;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
10 use File::Basename;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
11 #Usage
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
12 sub usage(){
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
13 print "Usage: perl Bam2Pair.pl -b <BAM> -o <outfile>\n
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
14 -isize [10000]\t\tThe insert size to be considered discordant\n
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
15 -winsize [10000]\tThe distance between mate pairs to be considered the same\n
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
16 -min [1]\t\tThe minimum number of reads required to support an SV event\n
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
17 -prefix need a random prefix so files with the same name don't get created\n\n"
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
18 ;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
19 }
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
20 $bedtools=`which intersectBed`;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
21 $samtools=`which samtools`;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
22
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
23 if(!defined($bedtools)){die "BEDtools must be installed\n";}
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
24 if(!defined($samtools)){die "Samtools must be installed\n";}
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
25 use Getopt::Long;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
26 #Declare variables
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
27 GetOptions(
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
28 'b=s' => \$BAM_FILE, #path to bam
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
29 'out=s' => \$outfile, #path to output
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
30 'java:s' => \$java ,
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
31 'chrom:s' => \$chrom ,
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
32 'isize=i' => \$isize,
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
33 'winsize=i' => \$winsize,
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
34 'prefix=s' => \$prefix,
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
35 'min=i' => \$minSupport,
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
36 'blacklist:s' => \$new_blacklist,
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
37 'q=s' => \$qual,
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
38 'v' => \$verbose
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
39 );
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
40 #if(defined($picard_path)){$picard_path=$picard_path} else {die "Must specify a path to PICARD so that files can be sorted and indexed properly\n"};
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
41 if(!defined($BAM_FILE)){die "Must specify a BAM file!\n".usage();}
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
42 if(!defined($outfile)){die "Must specify an out filename!\n".usage();}
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
43 if(!defined($java)){$java=$java;}else{$java=`which java`}
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
44 if(!defined($qual)){$qual=20}
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
45 if($new_blacklist){$new_blacklist=" -L $new_blacklist"}
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
46
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
47
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
48 $Filter_BAM=$BAM_FILE;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
49
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
50 @bam=split("/",$Filter_BAM);
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
51 $Filter_BAM=@bam[@bam-1];
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
52 $Filter_BAM=~s/.bam/$prefix.bam/;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
53 $Filter_sam=$Filter_BAM;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
54 $Filter_sam=~s/.bam/.sam/;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
55
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
56
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
57
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
58
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
59 print "\nLooking for Discordant read pairs (and Unmated reads) without soft-clips\n";
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
60
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
61 #$command=join("","samtools view -h -q 20 -f 1 -F 1804 ",$BAM_FILE," ",$chrom," ",$new_blacklist," | awk -F\'\\t\' \'{if (\$9 > ", $isize, " || \$9 < -",$isize," || \$9 == 0 || \$1 ~ /^@/) print \$0}' > ",$Filter_sam);
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
62
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
63
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
64 #Change command to allow reads where mate is unmapped & remove qual
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
65 $command=join("","samtools view -h -f 1 -F 1800 -q $qual ",$BAM_FILE," ",$chrom," ",$new_blacklist," | awk -F\'\\t\' \'{if (\$9 > ", $isize, " || \$9 < -",$isize," || \$9 == 0 || \$1 ~ /^@/) print \$0}' > ",$Filter_sam);
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
66
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
67 print "$command\n" if ($verbose);
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
68 system($command);
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
69 $path = dirname(__FILE__);
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
70 $Filter_cluster=$Filter_sam;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
71 $Filter_cluster=~s/.sam/.cluster/;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
72 $command=join("",$path,"/ReadCluster.pl -i=$Filter_sam -o=$Filter_cluster -m $minSupport");
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
73 if($verbose){print "\n$command\n"};
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
74
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
75 system($command);
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
76
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
77 ##################################
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
78 #Now there are 2 SAM files of filtered reads
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
79 #.filter.cluster.inter.sam
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
80 #.filter.cluster.intra.sam
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
81 $result_pe=join("",$Filter_cluster,".out");
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
82 $command=join("","cat ",$Filter_cluster,".int\*|perl -ane 'next if(\@F[0]=~/^\@/);if(\@F[6]!~/=/){print join(\"\\t\",\$F[11],\@F[2],\@F[3],\@F[6],\@F[7],\"\\n\")}else{print join(\"\\t\",\$F[11],\@F[2],\@F[3],\@F[2],\@F[7],\"\\n\")}' >",$result_pe);
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
83 if($verbose){print $command."\n"};
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
84 system($command);
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
85 #my ($sample, $chrstart, $start, $chrend, $end)
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
86 $command=join("","cat ",$result_pe," | ",$path,"/cluster.pair.pl ",$winsize," |awk '(\$6 >",$minSupport,")' >> ", $outfile);
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
87 if($verbose){print $command."\n"};
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
88 system($command);
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
89 $filt1=join("",$Filter_cluster,".inter.sam");
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
90 $filt2=join("",$Filter_cluster,".intra.sam");
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
91
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
92
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
93 unlink($Filter_sam,$filt1,$filt2,$result_pe);
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
94
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
95 #########################################
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
96 #Now determine if left or righ clipping surrogate
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
97 print "\nBam2pair.pl Done\n";
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
98