annotate run_cf0611.pl @ 26:9f96e2fa94ed draft

Create local tmp folder under /tmp instead of in galaxy_new_file_path location
author geert-vandeweyer
date Mon, 11 Aug 2014 05:22:08 -0400
parents 9a26daf268da
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
1 #!/usr/bin/perl
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
2
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
3 #Copyright 2012-2013 Haley Abel
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
4 #
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
5 #This file is part of ClusterFAST.
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
6 #
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
7 #ClusterFAST is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
8 #
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
9 #ClusterFAST is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
10 #
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
11 #You should have received a copy of the GNU General Public License along with ClusterFAST. If not, see <http://www.gnu.org/licenses/>.
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
12
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
13
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
14 use warnings;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
15 use strict;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
16
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
17 use Getopt::Long;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
18 use Pod::Usage;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
19 use File::Path qw(make_path);
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
20 use Cwd ;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
21 use Cwd 'abs_path';
17
b9711e16f148 Uploaded
geert-vandeweyer
parents: 14
diff changeset
22 use File::Basename;
1
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
23
22
9a26daf268da Uploaded
geert-vandeweyer
parents: 20
diff changeset
24 my ($inbam, $outdir, $targets, $minct1, $minct2, $dist, $novoindex, $rmtmp, $FASTA,$twobit,$refpaths,$bp1,$bp2,$contigs,$insert,$sdinsert);
1
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
25
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
26 $minct1=2;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
27 $minct2=1;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
28 $dist=50000;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
29 $rmtmp=1;
22
9a26daf268da Uploaded
geert-vandeweyer
parents: 20
diff changeset
30 $insert=230;
9a26daf268da Uploaded
geert-vandeweyer
parents: 20
diff changeset
31 $sdinsert=140;
1
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
32 my $known_partners=0;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
33 my $partnerfile='';
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
34
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
35 my $cwd = getcwd();
14
aaf5c557af13 Uploaded
geert-vandeweyer
parents: 12
diff changeset
36 my $scriptspath = '';
aaf5c557af13 Uploaded
geert-vandeweyer
parents: 12
diff changeset
37 my $cffile = '';
12
e4000df38772 Uploaded
geert-vandeweyer
parents: 6
diff changeset
38 # Galaxy installer puts scripts and jar into a folder, which is available under the environment variable CF_PATH
17
b9711e16f148 Uploaded
geert-vandeweyer
parents: 14
diff changeset
39 # this is future work (once github of clusterfast gets updated. For now, it's a placeholder.
12
e4000df38772 Uploaded
geert-vandeweyer
parents: 6
diff changeset
40 if (defined($ENV{'CF_PATH'}) && -d $ENV{'CF_PATH'}) {
17
b9711e16f148 Uploaded
geert-vandeweyer
parents: 14
diff changeset
41 $scriptspath = dirname(abs_path($0))."/cf_scripts";
b9711e16f148 Uploaded
geert-vandeweyer
parents: 14
diff changeset
42 $cffile = dirname(abs_path($0))."/cf_v1.1.jar";
12
e4000df38772 Uploaded
geert-vandeweyer
parents: 6
diff changeset
43 }
e4000df38772 Uploaded
geert-vandeweyer
parents: 6
diff changeset
44 else {
e4000df38772 Uploaded
geert-vandeweyer
parents: 6
diff changeset
45 $scriptspath="$cwd/cf_scripts";
e4000df38772 Uploaded
geert-vandeweyer
parents: 6
diff changeset
46 $cffile="$cwd/cf_v1.1.jar";
e4000df38772 Uploaded
geert-vandeweyer
parents: 6
diff changeset
47 }
1
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
48 my $help='';
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
49
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
50
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
51 #YOU MUST ADD LOCAL FILE PATHS HERE FOR STANDALONE USE (including trailing slashes!). Galaxy adds the binaries to the path, so these can be left empty.
6
233be956ae78 Uploaded
geert-vandeweyer
parents: 1
diff changeset
52 my $SAMTOOLS='';##INSERT PATH TO SAMTOOLS; http://sourceforge.net/projects/samtools/files/samtools/;
233be956ae78 Uploaded
geert-vandeweyer
parents: 1
diff changeset
53 my $NOVOALIGN='';##INSERT PATH TO NOVOALIGN; http://www.novocraft.com/main/downloadpage.php;
233be956ae78 Uploaded
geert-vandeweyer
parents: 1
diff changeset
54 my $VELVET='';##INSERT PATH TO VELVET;http://www.molecularevolution.org/software/genomics/velvet;
233be956ae78 Uploaded
geert-vandeweyer
parents: 1
diff changeset
55 my $BLAT='';##INSET PATH TO BLAT gfServer/gfClient tools;
1
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
56
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
57
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
58
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
59
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
60 GetOptions(
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
61 "b|inbam=s"=> \$inbam,
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
62 "o|outdir=s"=> \$outdir,
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
63 "t|targets=s"=> \$targets,
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
64 "f|fasta=s"=> \$FASTA,
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
65 "m1|minct1=i"=> \$minct1,
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
66 "m2|minct2=i"=> \$minct2,
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
67 "d|distance=i"=>\$dist,
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
68 "i|novoindex=s"=> \$novoindex,
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
69 "r|rmtmp=i"=> \$rmtmp,
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
70 "h|help"=>\$help,
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
71 "2|twobit=s"=>\$twobit,
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
72 "p|refpaths=s"=>\$refpaths,
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
73 "c|contigs=s"=>\$contigs,
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
74 "y|bp1=s"=>\$bp1,
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
75 "z|bp2=s"=>\$bp2,
22
9a26daf268da Uploaded
geert-vandeweyer
parents: 20
diff changeset
76 "i|insert=i"=>\$insert,
9a26daf268da Uploaded
geert-vandeweyer
parents: 20
diff changeset
77 "s|sdinsert=i"=>\$sdinsert,
1
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
78 ) or die "Usage: run_cf_0611.pl -b inbam -o outdir -f fasta -t targets -d distance -i novoindex -r rmtmp -m1 min_pairs -m2 min_splits -t genome.2bit\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
79
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
80 if($help) {
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
81 print "Usage: run_cf_0611.pl -b inbam -o outdir -f fasta -t targets -d distance -i novoindex -r rmtmp -m1 min_pairs -m2 min_splits -t genome.2bit\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
82 print "targets=BED style interval file containing the regions to search for SV\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
83 print "fasta=reference fasta file\nnovoindex=reference index for novoalign\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
84 print "distance=max distance from target to search (default=50000)\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
85 print "min_pairs=minimum number of read pairs supporting breakpoint (default=2)\nmin_splits=minimum number of split reads supporting breakpoint (default=1)\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
86 print "rmtmp=remove temp directory (default=1, true)\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
87 print "genome.2bit = 2bit index fasta file of reference genome, used by the blat server (which is launched on the localhost)\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
88 print "refpaths = semicolon seperated list of reference genomes paths (used by galaxy). The order is mandatory to be novoalign;2bit;fasta\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
89 print "contigs = (Optional) output file for the contigs (default: outdir/contigs.txt)\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
90 print "bp1 = (Optional) output file for the breakpoints.1.txt file (default: outdir/breakpoints.1.txt)\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
91 print "bp2 = (Optional) output file for the breakpoints.2.txt file (default: outdir/breakpoints.2.txt)\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
92 exit(1);
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
93 }
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
94
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
95 # if refpaths is defined => split into seperate variables.
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
96 if (defined($refpaths) && $refpaths ne '') {
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
97 ($novoindex, $twobit, $FASTA) = split(/;/,$refpaths);
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
98 }
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
99
26
9f96e2fa94ed Create local tmp folder under /tmp instead of in galaxy_new_file_path location
geert-vandeweyer
parents: 22
diff changeset
100 if(! ( defined $inbam && defined $targets && defined $FASTA && defined $novoindex && defined $twobit )) {
1
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
101 print STDERR "Usage: run_cf_0611.pl -b inbam -o outdir -f fasta -t targets -d distance -i novoindex -r rmtmp -m1 min_pairs -m2 min_splits -t genome.2bit\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
102 exit(1);
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
103 }
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
104 elsif (! (-e $inbam && -e $targets && -e $FASTA && -e $novoindex && -e $twobit)) {
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
105 print STDERR "Critical file missing.\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
106 exit(1);
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
107 }
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
108
26
9f96e2fa94ed Create local tmp folder under /tmp instead of in galaxy_new_file_path location
geert-vandeweyer
parents: 22
diff changeset
109 # output dir specified?
9f96e2fa94ed Create local tmp folder under /tmp instead of in galaxy_new_file_path location
geert-vandeweyer
parents: 22
diff changeset
110 if (!defined($outdir)) {
9f96e2fa94ed Create local tmp folder under /tmp instead of in galaxy_new_file_path location
geert-vandeweyer
parents: 22
diff changeset
111 $outdir = "/tmp/cf_tmp_".int(rand(1000));
9f96e2fa94ed Create local tmp folder under /tmp instead of in galaxy_new_file_path location
geert-vandeweyer
parents: 22
diff changeset
112 while (-d $outdir) {
9f96e2fa94ed Create local tmp folder under /tmp instead of in galaxy_new_file_path location
geert-vandeweyer
parents: 22
diff changeset
113 $outdir = "/tmp/cf_tmp_".int(rand(1000));
9f96e2fa94ed Create local tmp folder under /tmp instead of in galaxy_new_file_path location
geert-vandeweyer
parents: 22
diff changeset
114 }
9f96e2fa94ed Create local tmp folder under /tmp instead of in galaxy_new_file_path location
geert-vandeweyer
parents: 22
diff changeset
115 }
9f96e2fa94ed Create local tmp folder under /tmp instead of in galaxy_new_file_path location
geert-vandeweyer
parents: 22
diff changeset
116 # prepare output dir
1
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
117 make_empty_output();
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
118 $outdir = abs_path($outdir);
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
119
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
120 ## convert Targets.bed to targets.picard
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
121 my $cmd="${SAMTOOLS}samtools view -H $inbam > $outdir/Targets.picard";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
122 print STDOUT "$cmd\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
123 system($cmd);
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
124 $cmd = "awk '{OFS=\"\\t\"; print \$1,\$2,\$3,\$6,\$4 }' $targets >> $outdir/Targets.picard";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
125 print STDOUT "$cmd\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
126 system($cmd);
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
127 $targets = "$outdir/Targets.picard";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
128
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
129 ## check all reads are paired. Combination of paired and single end libraries crashes the tool.
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
130 my @fs = `${SAMTOOLS}samtools flagstat $inbam`;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
131 my $total = 0;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
132 my $paired = 0;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
133 foreach(@fs) {
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
134 chomp;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
135 if ($_ =~ m/^(\d+).*in total/) {
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
136 $total = $1;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
137 }
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
138 elsif ($_ =~ m/^(\d+).*paired in sequencing/) {
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
139 $paired = $1;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
140 }
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
141 }
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
142 if ($total != $paired) {
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
143 print "Removing single-read data from the input bam file\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
144 system("${SAMTOOLS}samtools view -f 1 -h -b -o $outdir/Paired.only.bam $inbam");
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
145 $inbam = "$outdir/Paired.only.bam";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
146 }
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
147
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
148 ## process.
19
f51dd12f2263 Uploaded
geert-vandeweyer
parents: 18
diff changeset
149 $cmd="java -Xmx8g -Xms6g -jar $cffile $inbam $targets $minct1 $dist $outdir 1 2>&1";
1
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
150 print STDOUT "$cmd\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
151 system($cmd);
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
152
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
153 chdir("${outdir}/temp");
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
154
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
155 if(!(-s "toRemap1.fq")) {
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
156 print STDOUT "No breakpoints found.\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
157 chdir("$outdir");
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
158 clean_up();
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
159 exit(1);
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
160 }
18
6b9abfbf796a Uploaded
geert-vandeweyer
parents: 17
diff changeset
161 my $r = int(rand(100));
6b9abfbf796a Uploaded
geert-vandeweyer
parents: 17
diff changeset
162 while (-e "/tmp/na.stderr.$r") {
6b9abfbf796a Uploaded
geert-vandeweyer
parents: 17
diff changeset
163 $r = int(rand(100));
6b9abfbf796a Uploaded
geert-vandeweyer
parents: 17
diff changeset
164 }
1
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
165
22
9a26daf268da Uploaded
geert-vandeweyer
parents: 20
diff changeset
166 $cmd="${NOVOALIGN}novoalign -o SAM -i $insert $sdinsert -r all -e 999 -c2 -d $novoindex -F STDFQ -f toRemap1.fq toRemap2.fq > novoout.2.sam 2>/tmp/na.stderr.$r";
1
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
167 print STDOUT "$cmd\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
168 system($cmd);
18
6b9abfbf796a Uploaded
geert-vandeweyer
parents: 17
diff changeset
169 my $na = `cat /tmp/na.stderr.$r`;
6b9abfbf796a Uploaded
geert-vandeweyer
parents: 17
diff changeset
170 print STDOUT $na;
6b9abfbf796a Uploaded
geert-vandeweyer
parents: 17
diff changeset
171 unlink("/tmp/na.stderr.$r");
1
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
172
19
f51dd12f2263 Uploaded
geert-vandeweyer
parents: 18
diff changeset
173 $cmd="java -Xmx6g -Xms4g -jar $cffile novoout.2.sam $targets $minct2 $dist $outdir 2 2>/dev/null";
1
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
174 print STDOUT "$cmd\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
175 system($cmd);
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
176
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
177
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
178 chdir("${outdir}/temp");
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
179 print STDOUT "Assembling contigs...\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
180 &assemble();
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
181 print STDOUT "Done\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
182
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
183
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
184 system("mv contigs.txt $outdir");
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
185 chdir("$outdir");
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
186 clean_up();
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
187
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
188
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
189
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
190
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
191 #########################
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
192
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
193 sub make_empty_output {
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
194 make_path($outdir);
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
195 open(FH, ">${outdir}/breakpoints.1.txt") or die "Can't open ${outdir}/breakpoints.1.txt"; close(FH);
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
196 open(FH, ">${outdir}/breakpoints.2.txt") or die "Can't open ${outdir}/breakpoints.2.txt"; close(FH);
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
197 open(FH, ">${outdir}/contigs.txt") or die "Can't open ${outdir}/contigs.txt"; close(FH);
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
198
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
199 }
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
200
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
201 sub assemble {
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
202 system("${SAMTOOLS}samtools view merged.sorted.final.temp.bam | sort -k 1,1 -k 2,2n -u | tee merged.sorted.final.unique.sam | perl ${scriptspath}/split_for_velvet.pl pair1.fq pair2.fq single.fq");
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
203 if(!(-s "pair1.fq")) {
20
e79b771cf3a9 Uploaded
geert-vandeweyer
parents: 19
diff changeset
204 print STDOUT "No breakpoints supported by split reads.\n";
1
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
205 exit(1);
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
206 }
19
f51dd12f2263 Uploaded
geert-vandeweyer
parents: 18
diff changeset
207 system("${VELVET}velveth velvet_out 31 -fastq -shortPaired -separate pair1.fq pair2.fq -short single.fq > /dev/null 2>&1");
f51dd12f2263 Uploaded
geert-vandeweyer
parents: 18
diff changeset
208 system("${VELVET}velvetg velvet_out -cov_cutoff 3 -ins_length 300 -exp_cov 100 -ins_length_sd 60 > /dev/null 2>&1");
1
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
209 # launch blat server at random unused port
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
210 my $port = 10000 + int(rand(5000));
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
211 my $status = `${BLAT}gfServer status 127.0.0.1 $port 2>&1` ;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
212 while ($status !~ m/Error/) {
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
213 $port = 10000 + int(rand(5000));
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
214 $status = `${BLAT}gfServer status 127.0.0.1 $port 2>&1` ;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
215 }
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
216 print "Waiting for BLAT server to come online.\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
217 system("(${BLAT}gfServer start 127.0.0.1 $port -canStop $twobit >/dev/null 2>&1) &");
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
218 # wait for startup
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
219 $status = `${BLAT}gfServer status 127.0.0.1 $port 2>&1` ;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
220 while ($status =~ m/Error/) {
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
221 sleep 15;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
222 $status = `${BLAT}gfServer status 127.0.0.1 $port 2>&1` ;
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
223 }
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
224
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
225 # run query
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
226 system("${BLAT}gfClient 127.0.0.1 $port / velvet_out/contigs.fa velvet.psl -minScore=2 -minIdentity=90 -nohead");
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
227 # take blat server down.
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
228 system("${BLAT}gfServer stop 127.0.0.1 $port");
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
229 # continue with analysis.
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
230 my $command = "perl ${scriptspath}/filter_contigs.0403.pl velvet.psl ${outdir}/breakpoints.2.txt velvet_out/contigs.fa velvet > velvet.txt";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
231 print STDOUT "$command\n";
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
232 system($command);
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
233 system("mv velvet.txt contigs.txt");
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
234 }
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
235
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
236 sub clean_up {
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
237 ## move files to output locations.
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
238 if (defined $contigs) {
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
239 $contigs = abs_path($contigs);
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
240 system("cp '$outdir/contigs.txt' '$contigs'");
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
241 }
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
242 if (defined $bp1) {
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
243 $bp1 = abs_path($bp1);
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
244 system("cp '$outdir/breakpoints.1.txt' '$bp1'");
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
245 }
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
246 if (defined $bp2) {
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
247 $bp2 = abs_path($bp2);
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
248 system("cp '$outdir/breakpoints.2.txt' '$bp2'");
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
249 }
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
250 if ($rmtmp) {
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
251 system("rm -R temp");
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
252 }
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
253 }
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
254
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
255
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
256
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
257
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
258
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
259
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
260
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
261
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
262
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
263
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
264
4a3afa90ff7f Uploaded
geert-vandeweyer
parents:
diff changeset
265