Mercurial > repos > plus91-technologies-pvt-ltd > ss_test_tool
comparison 2.4/script/Check_integration.sh @ 0:00b9898b8510 draft
Uploaded
| author | plus91-technologies-pvt-ltd |
|---|---|
| date | Wed, 04 Jun 2014 03:41:27 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:00b9898b8510 |
|---|---|
| 1 #!/bin/sh | |
| 2 #$ -V | |
| 3 #$ -cwd | |
| 4 #$ -q 1-day | |
| 5 #$ -m ae | |
| 6 #$ -M hart.steven@mayo.edu | |
| 7 #$ -l h_vmem=8G | |
| 8 #$ -l h_stack=10M | |
| 9 VCF_FILE=$1 | |
| 10 x=$2 | |
| 11 #VIRAL_SEQDB=/data2/bsi/tertiary/m110344/SoftTile/Mia/BLAST_DB/OBrien/Virus_PCGS.fasta #must me indexed by bwasw | |
| 12 VIRAL_SEQDB=$3 | |
| 13 VCF_FILE=$4 | |
| 14 #VCF_FILE=final.vcf | |
| 15 | |
| 16 set -x | |
| 17 | |
| 18 perl -ane '$dist=100; | |
| 19 $mate=$F[4]; | |
| 20 $mate=~s/[A-Z]|\[|\]//g; | |
| 21 @mate=split(/:/,$mate); | |
| 22 $end1a=@F[1]-$dist; | |
| 23 $end1b=@F[1]+$dist; | |
| 24 $end2a=$dist+@mate[1]; | |
| 25 $end2b=$dist+@mate[1]; | |
| 26 print "@F[0]\t$end1a\t$end1b\n@mate[0]\t$end2a\t$end2b\n"' $VCF_FILE| | |
| 27 sortBed > targets.bed | |
| 28 | |
| 29 | |
| 30 #100 min | |
| 31 time samtools view -h $x -L targets.bed |awk '(($9==0)&&($11!~/#/)&&($3!~/^chrGL/)&&($3!~/^chrM/))'|perl -ane 'print "\@@F[0]\n@F[9]\n+\n@F[10]\n"' >${x%%.bam}.res.fq | |
| 32 #23 min | |
| 33 time bwa mem -t 4 $VIRAL_SEQDB ${x%%.bam}.res.fq |samtools view -S - |grep gi > ${x%%.bam}.tmp.sam | |
| 34 | |
| 35 #find out how many hits there are | |
| 36 cut -f3 ${x%%.bam}.tmp.sam|perl -ne '@_=split(":",$_);@res=split(/_/,@_[1],2);print "@res[1]"' | sort|uniq -c|sort -k1nr|tee ${x%%.bam}.Viral_maps.out |head | |
| 37 #Get the reads mapping to those hits to find out where the integration site is | |
| 38 | |
| 39 #Read in the viruses until there is a significant drop off in number of reads (i.e. contributing less than 10%) | |
| 40 perl -ne '@_=split(" ",$_);$i=$_[0]+$i;$j=$_[0];$res=$j/$i;if($res>.1){print "@_[1]\n"}' ${x%%.bam}.Viral_maps.out >${x%%.bam}.to.keep | |
| 41 fgrep -f ${x%%.bam}.to.keep ${x%%.bam}.tmp.sam |cut -f1 >${x%%.bam}.reads | |
| 42 | |
| 43 #75min+ | |
| 44 | |
| 45 time samtools view $x -L targets.bed | | |
| 46 fgrep -f ${x%%.bam}.reads| | |
| 47 awk '{if(($9==0)&&($11!~/#/)&&($3!~/^chrGL/)&&($3!~/^chrM/)&&($3!~/^\*/)){print $3"\t"$4"\t"$4"\t"$1}}'| | |
| 48 tee ${x%%.bam}.unsorted.bed| | |
| 49 sortBed | mergeBed -nms -d 1000| | |
| 50 perl -e 'open (FILE,"$ARGV[0]") or die "cant open file\n\n"; | |
| 51 $SAM="$ARGV[1]"; | |
| 52 $SAM=~chomp; | |
| 53 while(<FILE>){ | |
| 54 chomp; | |
| 55 @_=split(/\t/,$_); | |
| 56 @reads=split(/;/,@_[3]); | |
| 57 #print "LINE=$_\nRES=grep $reads[0] $SAM\n"; | |
| 58 $res=`grep $reads[0] $SAM` ; | |
| 59 # print "AFTER GREP, RES=$res\n"; | |
| 60 if($res){ | |
| 61 @res=split(/\t/,$res); | |
| 62 print join("\t",@_[0..2],@res[2])."\n" | |
| 63 } | |
| 64 }; | |
| 65 close FILE' - ${x%%.bam}.tmp.sam | | |
| 66 perl -ne 's/\|/\t/g;@_=split("\t",$_);print join ("\t",@_[0..2,7])'| | |
| 67 perl -pne 's/_/\t/'| cut -f4 --complement | | |
| 68 perl -e ' | |
| 69 open (FILE,"$ARGV[0]") or die "cant open file\n\n"; | |
| 70 $SAM="$ARGV[1]"; | |
| 71 while(<FILE>){ | |
| 72 chomp; | |
| 73 @_=split(/\t/,$_); | |
| 74 $res=`grep $_[3] $SAM`; | |
| 75 if($res){ | |
| 76 @res=split(" ",$res); | |
| 77 $reads[0]=chomp; | |
| 78 print join("\t",@_[0..4],@res[0])."\n"; | |
| 79 } | |
| 80 } | |
| 81 close FILE; | |
| 82 ' - ${x%%.bam}.Viral_maps.out| | |
| 83 perl -pne 's/_/ /g'> ${x%%.bam}.Virus.integrated.bed | |
| 84 | |
| 85 rm ${x%%.bam}.reads ${x%%.bam}.to.keep ${x%%.bam}.tmp.sam ${x%%.bam}.res.fq | |
| 86 echo "DONE with $x" |
