annotate run_cf0611.pl @ 1:4a3afa90ff7f draft

Uploaded
author geert-vandeweyer
date Mon, 28 Jul 2014 05:53:55 -0400
parents
children 233be956ae78
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
1 #!/usr/bin/perl
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
2
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
3 #Copyright 2012-2013 Haley Abel
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
4 #
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
5 #This file is part of ClusterFAST.
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
6 #
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
7 #ClusterFAST is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
8 #
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
9 #ClusterFAST is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
10 #
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
11 #You should have received a copy of the GNU General Public License along with ClusterFAST. If not, see <http://www.gnu.org/licenses/>.
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
12
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
13
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
14 use warnings;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
15 use strict;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
16
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
17 use Getopt::Long;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
18 use Pod::Usage;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
19 use File::Path qw(make_path);
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
20 use Cwd ;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
21 use Cwd 'abs_path';
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
22
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
23 my ($inbam, $outdir, $targets, $minct1, $minct2, $dist, $novoindex, $rmtmp, $FASTA,$twobit,$refpaths,$bp1,$bp2,$contigs);
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
24
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
25 $minct1=2;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
26 $minct2=1;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
27 $dist=50000;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
28 $rmtmp=1;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
29 my $known_partners=0;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
30 my $partnerfile='';
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
31
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
32 my $cwd = getcwd();
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
33 my $scriptspath="$cwd/cf_scripts";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
34 my $cffile="$cwd/cf_v1.1.jar";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
35 my $help='';
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
36
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
37
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
38 #YOU MUST ADD LOCAL FILE PATHS HERE FOR STANDALONE USE (including trailing slashes!). Galaxy adds the binaries to the path, so these can be left empty.
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
39 my $SAMTOOLS=''##INSERT PATH TO SAMTOOLS; http://sourceforge.net/projects/samtools/files/samtools/;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
40 my $NOVOALIGN=''##INSERT PATH TO NOVOALIGN; http://www.novocraft.com/main/downloadpage.php;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
41 my $VELVET=''##INSERT PATH TO VELVET;http://www.molecularevolution.org/software/genomics/velvet;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
42 my $BLAT=''##INSET PATH TO BLAT gfServer/gfClient tools;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
43
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
44
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
45
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
46
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
47 GetOptions(
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
48 "b|inbam=s"=> \$inbam,
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
49 "o|outdir=s"=> \$outdir,
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
50 "t|targets=s"=> \$targets,
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
51 "f|fasta=s"=> \$FASTA,
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
52 "m1|minct1=i"=> \$minct1,
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
53 "m2|minct2=i"=> \$minct2,
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
54 "d|distance=i"=>\$dist,
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
55 "i|novoindex=s"=> \$novoindex,
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
56 "r|rmtmp=i"=> \$rmtmp,
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
57 "h|help"=>\$help,
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
58 "2|twobit=s"=>\$twobit,
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
59 "p|refpaths=s"=>\$refpaths,
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
60 "c|contigs=s"=>\$contigs,
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
61 "y|bp1=s"=>\$bp1,
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
62 "z|bp2=s"=>\$bp2,
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
63 ) or die "Usage: run_cf_0611.pl -b inbam -o outdir -f fasta -t targets -d distance -i novoindex -r rmtmp -m1 min_pairs -m2 min_splits -t genome.2bit\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
64
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
65 if($help) {
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
66 print "Usage: run_cf_0611.pl -b inbam -o outdir -f fasta -t targets -d distance -i novoindex -r rmtmp -m1 min_pairs -m2 min_splits -t genome.2bit\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
67 print "targets=BED style interval file containing the regions to search for SV\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
68 print "fasta=reference fasta file\nnovoindex=reference index for novoalign\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
69 print "distance=max distance from target to search (default=50000)\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
70 print "min_pairs=minimum number of read pairs supporting breakpoint (default=2)\nmin_splits=minimum number of split reads supporting breakpoint (default=1)\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
71 print "rmtmp=remove temp directory (default=1, true)\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
72 print "genome.2bit = 2bit index fasta file of reference genome, used by the blat server (which is launched on the localhost)\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
73 print "refpaths = semicolon seperated list of reference genomes paths (used by galaxy). The order is mandatory to be novoalign;2bit;fasta\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
74 print "contigs = (Optional) output file for the contigs (default: outdir/contigs.txt)\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
75 print "bp1 = (Optional) output file for the breakpoints.1.txt file (default: outdir/breakpoints.1.txt)\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
76 print "bp2 = (Optional) output file for the breakpoints.2.txt file (default: outdir/breakpoints.2.txt)\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
77 exit(1);
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
78 }
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
79
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
80 # if refpaths is defined => split into seperate variables.
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
81 if (defined($refpaths) && $refpaths ne '') {
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
82 ($novoindex, $twobit, $FASTA) = split(/;/,$refpaths);
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
83 }
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
84
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
85 if(! ( defined $inbam && defined $targets && defined $FASTA && defined $outdir && defined $novoindex && defined $twobit )) {
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
86 print STDERR "Usage: run_cf_0611.pl -b inbam -o outdir -f fasta -t targets -d distance -i novoindex -r rmtmp -m1 min_pairs -m2 min_splits -t genome.2bit\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
87 exit(1);
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
88 }
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
89 elsif (! (-e $inbam && -e $targets && -e $FASTA && -e $novoindex && -e $twobit)) {
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
90 print STDERR "Critical file missing.\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
91 exit(1);
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
92 }
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
93
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
94 make_empty_output();
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
95 $outdir = abs_path($outdir);
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
96
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
97 ## convert Targets.bed to targets.picard
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
98 my $cmd="${SAMTOOLS}samtools view -H $inbam > $outdir/Targets.picard";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
99 print STDOUT "$cmd\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
100 system($cmd);
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
101 $cmd = "awk '{OFS=\"\\t\"; print \$1,\$2,\$3,\$6,\$4 }' $targets >> $outdir/Targets.picard";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
102 print STDOUT "$cmd\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
103 system($cmd);
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
104 $targets = "$outdir/Targets.picard";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
105
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
106 ## check all reads are paired. Combination of paired and single end libraries crashes the tool.
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
107 my @fs = `${SAMTOOLS}samtools flagstat $inbam`;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
108 my $total = 0;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
109 my $paired = 0;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
110 foreach(@fs) {
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
111 chomp;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
112 if ($_ =~ m/^(\d+).*in total/) {
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
113 $total = $1;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
114 }
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
115 elsif ($_ =~ m/^(\d+).*paired in sequencing/) {
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
116 $paired = $1;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
117 }
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
118 }
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
119 if ($total != $paired) {
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
120 print "Removing single-read data from the input bam file\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
121 system("${SAMTOOLS}samtools view -f 1 -h -b -o $outdir/Paired.only.bam $inbam");
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
122 $inbam = "$outdir/Paired.only.bam";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
123 }
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
124
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
125 ## process.
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
126 $cmd="java -Xmx8g -Xms6g -jar $cffile $inbam $targets $minct1 $dist $outdir 1";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
127 print STDOUT "$cmd\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
128 system($cmd);
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
129
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
130 chdir("${outdir}/temp");
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
131
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
132 if(!(-s "toRemap1.fq")) {
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
133 print STDOUT "No breakpoints found.\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
134 chdir("$outdir");
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
135 clean_up();
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
136 exit(1);
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
137 }
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
138
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
139 $cmd="${NOVOALIGN}novoalign -o SAM -i 230 140 -r all -e 999 -c2 -d $novoindex -F STDFQ -f toRemap1.fq toRemap2.fq > novoout.2.sam";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
140 print STDOUT "$cmd\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
141 system($cmd);
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
142
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
143 $cmd="java -Xmx6g -Xms4g -jar $cffile novoout.2.sam $targets $minct2 $dist $outdir 2";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
144 print STDOUT "$cmd\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
145 system($cmd);
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
146
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
147
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
148 chdir("${outdir}/temp");
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
149 print STDOUT "Assembling contigs...\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
150 &assemble();
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
151 print STDOUT "Done\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
152
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
153
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
154 system("mv contigs.txt $outdir");
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
155 chdir("$outdir");
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
156 clean_up();
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
157
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
158
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
159
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
160
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
161 #########################
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
162
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
163 sub make_empty_output {
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
164 make_path($outdir);
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
165 open(FH, ">${outdir}/breakpoints.1.txt") or die "Can't open ${outdir}/breakpoints.1.txt"; close(FH);
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
166 open(FH, ">${outdir}/breakpoints.2.txt") or die "Can't open ${outdir}/breakpoints.2.txt"; close(FH);
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
167 open(FH, ">${outdir}/contigs.txt") or die "Can't open ${outdir}/contigs.txt"; close(FH);
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
168
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
169 }
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
170
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
171 sub assemble {
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
172 system("${SAMTOOLS}samtools view merged.sorted.final.temp.bam | sort -k 1,1 -k 2,2n -u | tee merged.sorted.final.unique.sam | perl ${scriptspath}/split_for_velvet.pl pair1.fq pair2.fq single.fq");
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
173 if(!(-s "pair1.fq")) {
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
174 print STDERR "No breakpoints supported by split reads.\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
175 exit(1);
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
176 }
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
177 system("${VELVET}velveth velvet_out 31 -fastq -shortPaired -separate pair1.fq pair2.fq -short single.fq > /dev/null");
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
178 system("${VELVET}velvetg velvet_out -cov_cutoff 3 -ins_length 300 -exp_cov 100 -ins_length_sd 60 > /dev/null");
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
179 # launch blat server at random unused port
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
180 my $port = 10000 + int(rand(5000));
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
181 my $status = `${BLAT}gfServer status 127.0.0.1 $port 2>&1` ;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
182 while ($status !~ m/Error/) {
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
183 $port = 10000 + int(rand(5000));
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
184 $status = `${BLAT}gfServer status 127.0.0.1 $port 2>&1` ;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
185 }
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
186 print "Waiting for BLAT server to come online.\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
187 system("(${BLAT}gfServer start 127.0.0.1 $port -canStop $twobit >/dev/null 2>&1) &");
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
188 # wait for startup
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
189 $status = `${BLAT}gfServer status 127.0.0.1 $port 2>&1` ;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
190 while ($status =~ m/Error/) {
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
191 sleep 15;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
192 $status = `${BLAT}gfServer status 127.0.0.1 $port 2>&1` ;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
193 }
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
194
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
195 # run query
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
196 system("${BLAT}gfClient 127.0.0.1 $port / velvet_out/contigs.fa velvet.psl -minScore=2 -minIdentity=90 -nohead");
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
197 # take blat server down.
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
198 system("${BLAT}gfServer stop 127.0.0.1 $port");
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
199 # continue with analysis.
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
200 my $command = "perl ${scriptspath}/filter_contigs.0403.pl velvet.psl ${outdir}/breakpoints.2.txt velvet_out/contigs.fa velvet > velvet.txt";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
201 print STDOUT "$command\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
202 system($command);
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
203 system("mv velvet.txt contigs.txt");
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
204 }
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
205
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
206 sub clean_up {
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
207 ## move files to output locations.
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
208 if (defined $contigs) {
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
209 $contigs = abs_path($contigs);
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
210 system("cp '$outdir/contigs.txt' '$contigs'");
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
211 }
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
212 if (defined $bp1) {
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
213 $bp1 = abs_path($bp1);
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
214 system("cp '$outdir/breakpoints.1.txt' '$bp1'");
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
215 }
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
216 if (defined $bp2) {
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
217 $bp2 = abs_path($bp2);
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
218 system("cp '$outdir/breakpoints.2.txt' '$bp2'");
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
219 }
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
220 if ($rmtmp) {
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
221 system("rm -R temp");
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
222 }
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
223 }
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
224
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
225
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
226
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
227
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
228
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
229
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
230
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
231
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
232
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
233
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
234
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
235