annotate variant_effect_predictor/Bio/EnsEMBL/Funcgen/Utils/EFGUtils.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 =head1 LICENSE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 Copyright (c) 1999-2011 The European Bioinformatics Institute and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 Genome Research Limited. All rights reserved.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 This software is distributed under a modified Apache license.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 For license details, please see
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 http://www.ensembl.org/info/about/code_licence.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 =head1 CONTACT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 Please email comments or questions to the public Ensembl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14 developers list at <ensembl-dev@ebi.ac.uk>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16 Questions may also be sent to the Ensembl help desk at
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 <helpdesk@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 Bio::EnsEMBL::Funcgen::Utils::EFGUtils
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 This module collates a variety of miscellaneous methods.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28 =head1 SYNOPSIS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30 BEGIN
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32 unshift(@INC,"/path/of/local/src/modules");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35 use Utils;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37 &Utils::send_mail($to_address, $title, $message);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42 # No API/Object based methods in here
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44 ###############################################################################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 package Bio::EnsEMBL::Funcgen::Utils::EFGUtils;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47 require Exporter;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 @ISA = qw(Exporter);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 @EXPORT_OK = qw(get_date species_name get_month_number species_chr_num
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50 open_file median mean run_system_cmd backup_file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 is_gzipped is_sam is_bed get_file_format strip_param_args
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52 generate_slices_from_names strip_param_flags
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 get_current_regulatory_input_names add_external_db);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55 use Bio::EnsEMBL::Utils::Exception qw( throw );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 use File::Path qw (mkpath);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 use File::Basename qw (dirname);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59 use Time::Local;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 use FileHandle;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61 use Carp;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63 sub get_date{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 my ($format, $file) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 my ($time, $sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69 throw("File does not exist or is not a regular file:\t$file") if $file && ! -f $file;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72 ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = (defined $file) ?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 localtime((stat($file))[9]) : localtime();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 #print " ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst)\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77 if((! defined $format && ! defined $file) || $format eq "date"){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78 $time = ($year+1900)."-".$mday."-".($mon+1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80 elsif($format eq "time"){#not working!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 $time = "${hour}:${min}:${sec}";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83 elsif($format eq "timedate"){#
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 $time = localtime();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86 else{#add mysql formats here, datetime etc...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87 croak("get_date does not handle format:\t$format");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90 return $time;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94 #migrate this data to defs file!!??
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 #must contain all E! species and any other species which are used in local DB extractions
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 #NEED TO ADD FLY!!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 sub species_name{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 my($species) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 my %species_names = (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 "HOMO_SAPIENS", "human",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102 "MUS_MUSCULUS", "mouse",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103 "RATTUS_NORVEGICUS", "rat",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 "CANIS_FAMILIARIS", "dog",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105 "PAN_TROGOLODYTES", "chimp",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 "GALLUS_GALLUS", "chicken",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107 "SACCHAROMYCES_CEREVISIAE", "yeast",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 "HUMAN", "HOMO_SAPIENS",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 "MOUSE", "MUS_MUSCULUS",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 "RAT","RATTUS_NORVEGICUS",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 "DOG", "CANIS_FAMILIARIS",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112 "CHIMP", "PAN_TROGOLODYTES",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113 "CHICKEN", "GALLUS_GALLUS",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 "YEAST", "SACCHAROMYCES_CEREVISIAE",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117 return $species_names{uc($species)};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 sub get_month_number{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 my($mon) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122 my %month_nos =(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 "jan", "01",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124 "feb", "02",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125 "mar", "03",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126 "apr", "04",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127 "may", "05",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128 "jun", "06",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129 "jul", "07",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 "aug", "08",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 "sep", "09",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132 "oct", "10",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133 "nov", "11",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 "dec", "12",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136 return $month_nos{lc($mon)};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140 sub species_chr_num{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 my ($species, $val) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 ($species = lc($species)) =~ s/ /_/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145 my %species_chrs = (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146 homo_sapiens => {(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 'x' => 23,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148 'y' => 24,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149 'mt' => 25,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150 )},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152 mus_musculus => {(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 'x' => 20,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154 'y' => 21,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 'mt' => 22,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 )},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 rattus_norvegicus => {(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159 'x' => 21,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 'y' => 22,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161 'mt' => 23,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162 )},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 die("species not defined in chromosome hash") if(! exists $species_chrs{$species});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167 return (exists $species_chrs{$species}{lc($val)}) ? $species_chrs{$species}{lc($val)} : $val;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170 #Sort should always be done in the caller if required
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172 sub median{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173 my ($scores, $sort) = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175 return undef if (! @$scores);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178 my ($median);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179 my $count = scalar(@$scores);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180 my $index = $count-1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181 #need to deal with lines with no results!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182 #deal with one score fastest
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183 return $scores->[0] if ($count == 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186 if($sort){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187 #This is going to sort the reference here, so will affect
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188 #The array in the caller
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189 #We need to deref to avoid this
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192 #taken from Statistics::Descriptive
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193 #remeber we're dealing with size starting with 1 but indices starting at 0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195 if ($count % 2) { #odd number of scores
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196 $median = $scores->[($index+1)/2];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198 else { #even, get mean of flanks
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199 $median = ($scores->[($index)/2] + $scores->[($index/2)+1] ) / 2;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203 return $median;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207 sub mean{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208 my $scores = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210 my $total = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212 map $total+= $_, @$scores;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213 my $mean = $total/(scalar(@$scores));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215 return $mean;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219 #Should really extend this to detect previous file?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220 #Or do in caller?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222 sub open_file{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223 my ($file, $operator, $file_permissions) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225 my $dir_permissions = $file_permissions || 0755;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227 $operator ||= '<';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229 if($operator !~ /%/){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230 $operator = "$operator $file";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233 #We have some piping to do
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234 $operator = sprintf($operator, $file);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237 #Get dir here and create if not exists
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238 my $dir = dirname($file);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239 mkpath($dir, {verbose => 1, mode => $dir_permissions}) if(! -d $dir);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240 my $fh = new FileHandle "$operator";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243 #This does not catch incorrectly defined named pipes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245 if(! defined $fh){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246 croak("Failed to open $operator");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250 #Have to chmod here as umask will over-ride permissions passed to FileHandle
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251 if(defined $file_permissions){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253 #Catch non-numeric here as chmod still returns true
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254 if($file_permissions =~ /[^0-9]/){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255 croak("Failed to change $file permissions using:\t$file_permissions");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258 #chmod requires a literal octal number e.g. 0775 not '0775'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259 #should catch numbers as strings here, but perl makes this very hard to test
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260 #Can't even system this as if we build the cmd line with an octal it will be converted to a decimal
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261 #These is still no way of testing for non-octal number or string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262 #eval/sprintf will also not fail if there are non-octal digits i.e. 1999
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264 #eval will treat octal number and string as true octal number
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265 #else will pass non-octal string/number which we can't catch
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266 chmod(eval($file_permissions), $file);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269 return $fh;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274 ################################################################################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
275
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
276 =head2 run_system_cmd
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
277
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
278 Description : Method to control the execution of the standard system() command
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
279
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
280 ReturnType : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
281
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
282 Example : $Helper->debug(2,"dir=$dir file=$file");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
283
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
284 Exceptions : throws exception if system command returns none zero
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
285
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
286 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
287
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
288 ################################################################################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
289
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
290 sub run_system_cmd{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
291 my ($command, $no_exit) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
292
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
293 my $redirect = '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
294
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
295 #$self->debug(3, "system($command)");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
296
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
297 # decide where the command line output should be redirected
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
298
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
299 #This should account for redirects
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
300
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
301 #if ($self->{_debug_level} >= 3){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
302
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
303 # if (defined $self->{_debug_file}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
304 # $redirect = " >>".$self->{_debug_file}." 2>&1";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
305 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
306 # else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
307 # $redirect = "";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
308 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
309 #}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
310 #else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
311 #$redirect = " > /dev/null 2>&1";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
312 #}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
313
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
314 # execute the passed system command
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
315 my $status = system("$command $redirect");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
316 my $exit_code = $status >> 8;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
317
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
318
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
319 if ($status == -1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
320 warn "Failed to execute: $!\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
321 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
322 elsif ($status & 127) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
323 warn sprintf("Child died with signal %d, %s coredump\nError:\t$!",($status & 127),($status & 128) ? 'with' : 'without');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
324 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
325 elsif($status != 0) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
326 warn sprintf("Child exited with value %d\nError:\t$!\n", $exit_code); #get the true exit code
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
327 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
328
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
329 #We're not catchign error message here!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
330
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
331 if ($exit_code != 0){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
332
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
333 if (! $no_exit){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
334 throw("System command failed:\t$command\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
335 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
336 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
337 warn("System command returned non-zero exit code:\t$command\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
338 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
339 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
340
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
341 #reverse boolean logic for perl...can't do this anymore due to tab2mage successful non-zero exit codes :/
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
342
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
343 return $exit_code;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
344 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
345
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
346
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
347 sub backup_file{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
348 my $file_path = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
349
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
350 throw("Must define a file path to backup") if(! $file_path);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
351
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
352 if (-f $file_path) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
353 #$self->log("Backing up:\t$file_path");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
354 system ("mv ${file_path} ${file_path}.".`date '+%T'`) == 0 || return 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
355 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
356
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
357 return 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
358
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
359 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
360
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
361
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
362 sub get_file_format{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
363 my $file = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
364
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
365 my $format = &is_bed($file);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
366
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
367 if(! $format){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
368 $format = &is_sam($file);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
369
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
370 #Add more testes here
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
371 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
372
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
373
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
374 return $format;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
375 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
376
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
377 sub is_gzipped {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
378 my ($file, $fail_if_compressed) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
379
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
380 throw ("File does not exist:\t$file") if ! -e $file;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
381
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
382 open(FILE, "file -L $file |")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
383 or throw("Can't execute command 'file' on '$file'");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
384 my $file_info = <FILE>;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
385 close FILE;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
386
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
387 my $gzip = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
388
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
389 if($file_info =~ /compressed data/){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
390
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
391 if($file_info =~ /gzip/){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
392 $gzip = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
393 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
394 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
395 throw("File is compressed but not with gzip, please unzip or gzip:\t$file_info");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
396 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
397 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
398
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
399 return $gzip;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
400 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
401
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
402 #Change these to also return the gz status
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
403
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
404 sub is_sam{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
405 my $file = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
406
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
407 warn "Only checking file suffix for is_sam";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
408 #Could check for header here altho this is not mandatory!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
409 #Can we use web format guessing code?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
410
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
411 my $gz = (&is_gzipped($file, 1)) ? '.gz' : '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
412
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
413 return ($file =~ /.sam${gz}/) ? 'sam' : 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
414 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
415
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
416 #need is bam here too!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
417
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
418 sub is_bed {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
419 my $file = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
420
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
421 #Use open_file here!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
422
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
423 if(&is_gzipped($file, 1)){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
424
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
425 open(FILE, "zcat $file 2>&1 |") or throw("Can't open file via zcat:\t$file");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
426 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
427 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
428 open(FILE, $file) or throw("Can't open file:\t$file");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
429 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
430
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
431 my @line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
432 #$verbose =1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
433
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
434
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
435 while (<FILE>) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
436 chomp;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
437 @line = split("\t", $_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
438 last;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
439 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
440 close FILE;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
441
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
442 if (scalar @line < 6) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
443 warn "Infile '$file' does not have 6 or more columns. We expect bed format:\t".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
444 "CHROM START END NAME SCORE STRAND.\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
445 return 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
446 #} elsif ($line[0] !~ m/^((chr)?[MTXYNT_\d]+)$/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
447 # warn ("1st column must contain name of seq_region (e.g. chr1 or 1) in '$file'");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
448 # return 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
449 #Commented this out for now due to HSCHR_RANDOM seqs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
450 #How does the webcode handle this?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
451 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
452 elsif ($line[1] !~ m/^\d+$/ && $line[2] =~ m/^\d+$/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
453 warn "2nd and 3rd column must contain start and end respectively in '$file'\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
454 return 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
455 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
456 elsif ($line[5] !~ m/^[+-\.]$/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
457 warn "6th column must define strand (either +, - or .) in '$file'\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
458 return 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
459 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
460
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
461 return 'bed';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
462 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
463
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
464
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
465 #These subs are useful for implementing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
466 #a farm mode in a run script, where a script can
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
467 #submit itself to the farm as slice based jobs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
468
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
469 #strip cmd line params and associated arguments from a list
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
470 #should not be used to remove flag options i.e. no following args
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
471 #as this may cause removal of any following @ARGV;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
472 #Can this be used on flattened args hash?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
473
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
474 sub strip_param_args{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
475 my ($args, @strip_params) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
476
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
477 my $param_name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
478 my $seen_opt = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
479
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
480 foreach my $i(0..$#{$args}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
481
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
482 if($args->[$i] =~ /^[-]+/){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
483 $seen_opt = 0;#Reset seen opt if we seen a new one
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
484
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
485 ($param_name = $args->[$i]) =~ s/^[-]+//;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
486
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
487 if(grep/^${param_name}$/, @strip_params){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
488 $seen_opt = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
489 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
490 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
491
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
492 #$args->[$i] = '' if $args->[$i] =~ /^[-]+farm/;#Only remove current flag
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
493 #$seen_opt = 1 if $args->[$i] =~ /^[-]+skip_slices/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
494 #$seen_opt = 1 if $args->[$i] =~ /^[-]+slice/;#Don't have full param name incase we have just specified -slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
495
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
496 $args->[$i] = '' if $seen_opt;#Remove option and args following option
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
497 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
498
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
499 return $args;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
500 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
501
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
502
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
503 sub strip_param_flags{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
504 my ($args, @strip_params) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
505
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
506 my @args = @$args;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
507
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
508 foreach my $flag(@strip_params){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
509 @args = grep(!/[-]+${flag}$/, @args);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
510 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
511
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
512 return \@args;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
513 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
514
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
515 #Generates slices from names or optionally alll default top level nonref
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
516
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
517 sub generate_slices_from_names{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
518 my ($slice_adaptor, $slice_names, $skip_slices, $highestlevel, $non_ref, $inc_dups, $assembly) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
519
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
520 #Test if $assembly is old?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
521
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
522
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
523
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
524 my (@slices, $slice, $sr_name);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
525
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
526 if(@$slice_names){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
527
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
528 foreach my $name(@$slice_names){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
529 $slice = $slice_adaptor->fetch_by_region(undef, $name, undef, undef, undef, $assembly);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
530
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
531 #WHy is this failing for hap regions?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
532
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
533 if(! $slice){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
534
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
535 #Need to eval this as it will break with incorrect formating
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
536
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
537 eval { $slice = $slice_adaptor->fetch_by_name($name) };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
538
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
539 if(! $slice){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
540 throw("Could not fetch slice by region or name:\t".$name);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
541 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
542 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
543
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
544 $sr_name = $slice->seq_region_name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
545
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
546 next if(grep/^${sr_name}$/, @$skip_slices);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
547 push @slices, $slice;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
548 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
549 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
550 elsif($highestlevel){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
551
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
552 my $level = 'toplevel';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
553
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
554 if($assembly){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
555 $level = 'chromosome';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
556 warn "Cannot get toplevel for old assembly version $assembly, defaulting to 'chromosome' level";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
557 #Would ignore old assembly and just fetch current assembly otherwise as there is no toplevel for old assemblies
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
558 #No need for projection on non-ref unassembled seqs as these will/should be identical
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
559 #Only need need to project assembled seq i.e. haps(lrgs?).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
560 #Only rollback toplevel data when cleaning after projection, otherwise we may lose some data.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
561 #Change default delete to use all toplevel ref seqs (and non-ref with cs version e.g. haps but not lrgs)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
562 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
563
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
564 my @tmp_slices = @{$slice_adaptor->fetch_all($level, $assembly, $non_ref, $inc_dups)};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
565
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
566 if(@$skip_slices){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
567
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
568 foreach $slice(@tmp_slices){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
569 $sr_name = $slice->seq_region_name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
570 push @slices, $slice if ! grep/^${sr_name}$/, @$skip_slices;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
571 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
572 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
573 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
574 @slices = @tmp_slices;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
575 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
576 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
577 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
578 throw('You must either pass an arrayref of slice names or specify the toplevel flag');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
579 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
580
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
581
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
582 if(! @slices){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
583 throw("You have specified slice_names and skip_slices paramters which have generated no slices.\nslice_names:\t".join(' ',@$slice_names)."\nskip_slices:\t".join(' ', @$skip_slices));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
584 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
585
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
586 return \@slices;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
587 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
588
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
589
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
590 # Tracking DB methods
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
591 # Move to DBAdaptor? Can we add this as a separate package in the same module?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
592
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
593 sub get_current_regulatory_input_names{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
594 my ($tdb, $efg_db, $focus) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
595
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
596 #Validate is production?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
597 my $sql;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
598
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
599
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
600
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
601 if($focus){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
602 $focus = 'Focus';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
603 $sql = 'SELECT efgdb_set_name from dataset where is_focus=true and is_current=true and species="'.$efg_db->species.'"';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
604 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
605 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
606 $focus = 'Non-focus';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
607 #0 rather than false so we don't get NULLs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
608 $sql = 'SELECT efgdb_set_name from dataset where is_focus=0 and is_current=true and species="'.$efg_db->species.'"';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
609 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
610
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
611
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
612 #Currently efgdb_set_name can either be data_set or feature_set name!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
613 #Need to standardise this
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
614
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
615 my @prd_names = @{$tdb->db_handle->selectcol_arrayref($sql)};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
616 my @names;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
617 my @failed_sets;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
618
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
619 foreach my $prd_name(@prd_names){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
620
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
621 $sql = "SELECT name from feature_set where name like '${prd_name}%'";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
622 my @tmp_names = @{$efg_db->dbc->db_handle->selectcol_arrayref($sql)};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
623
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
624 #This is causing problems with multiple feature sets with differing analyses
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
625
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
626 #Do this via InputSets(using query extension?) instead of using like?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
627
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
628 #This is very hacky right now to get it to work
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
629 #Need to standardise and review tracking db data.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
630
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
631 if(scalar(@tmp_names) > 1){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
632
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
633 $sql = "SELECT name from feature_set where name ='${prd_name}_ccat_histone'";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
634 @tmp_names = @{$efg_db->dbc->db_handle->selectcol_arrayref($sql)};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
635
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
636 if(scalar(@tmp_names) == 1){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
637 push @names, $tmp_names[0];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
638 }else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
639 push @failed_sets, $prd_name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
640 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
641 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
642 elsif(scalar(@tmp_names) == 0){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
643 push @failed_sets, $prd_name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
644 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
645 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
646 push @names, $tmp_names[0];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
647 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
648
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
649 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
650
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
651 if(@failed_sets){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
652 throw("Failed to find unique $focus FeatureSets for production dataset names:\n\t".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
653 join("\n\t", @failed_sets)."\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
654 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
655
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
656 return @names;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
657 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
658
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
659 #Handy function to add an external_db entry when needed
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
660 sub add_external_db{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
661 my ($efg_db, $db_name,$db_release,$db_display_name) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
662 my $sql = "select external_db_id from external_db where db_name='$db_name' and db_release='$db_release'";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
663 my ($db_id) = $efg_db->dbc->db_handle->selectrow_array($sql);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
664 if($db_id){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
665 warn "External DB $db_name $db_release already exists in db with db_id $db_id\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
666 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
667 #TODO check if it there was a failure
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
668 $efg_db->dbc->do("insert into external_db (db_name, db_release, status, dbprimary_acc_linkable, priority, db_display_name, type) values('$db_name', '$db_release', 'KNOWNXREF', '1', '5', '$db_display_name', 'MISC')");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
669 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
670
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
671 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
672
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
673 1;