Mercurial > repos > devteam > hgv_fundo
comparison disease_ontology_gene_fuzzy_selector.pl @ 0:6b09d2ed034f draft
Uploaded tool tarball.
author | devteam |
---|---|
date | Tue, 20 Aug 2013 10:55:38 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:6b09d2ed034f |
---|---|
1 #!/usr/bin/env perl | |
2 | |
3 use strict; | |
4 use warnings; | |
5 | |
6 ################################################################## | |
7 # Select genes that are associated with the diseases listed in the | |
8 # disease ontology. | |
9 # ontology: http://do-wiki.nubic.northwestern.edu/index.php/Main_Page | |
10 # gene associations by FunDO: http://projects.bioinformatics.northwestern.edu/do_rif/ | |
11 # Sept 2010, switch to doLite | |
12 # input: build outfile sourceFileLoc.loc term or partial term | |
13 ################################################################## | |
14 | |
15 if (!@ARGV or @ARGV < 3) { | |
16 print "usage: disease_ontology_gene_selector.pl build outfile.txt sourceFile.loc [list of terms]\n"; | |
17 exit; | |
18 } | |
19 | |
20 my $build = shift @ARGV; | |
21 my $out = shift @ARGV; | |
22 my $in = shift @ARGV; | |
23 my $term = shift @ARGV; | |
24 $term =~ s/^'//; #remove quotes protecting from shell | |
25 $term =~ s/'$//; | |
26 my $data; | |
27 open(LOC, $in) or die "Couldn't open $in, $!\n"; | |
28 while (<LOC>) { | |
29 chomp; | |
30 if (/^\s*#/) { next; } | |
31 my @f = split(/\t/); | |
32 if ($f[0] eq $build) { | |
33 if ($f[1] eq 'disease associated genes') { | |
34 $data = $f[2]; | |
35 } | |
36 } | |
37 } | |
38 close LOC or die "Couldn't close $in, $!\n"; | |
39 if (!$data) { | |
40 print "Error $build not found in $in\n"; | |
41 exit; | |
42 } | |
43 if (!defined $term) { | |
44 print "No disease term entered\n"; | |
45 exit; | |
46 } | |
47 | |
48 #start with just fuzzy term matches | |
49 open(OUT, ">", $out) or die "Couldn't open $out, $!\n"; | |
50 open(FH, $data) or die "Couldn't open data file $data, $!\n"; | |
51 $term =~ s/\s+/|/g; #use OR between words | |
52 while (<FH>) { | |
53 chomp; | |
54 my @f = split(/\t/); #chrom start end strand geneName geneID disease | |
55 if ($f[6] =~ /($term)/i) { | |
56 print OUT join("\t", @f), "\n"; | |
57 }elsif ($term eq 'disease') { #print all with disease | |
58 print OUT join("\t", @f), "\n"; | |
59 } | |
60 } | |
61 close FH or die "Couldn't close data file $data, $!\n"; | |
62 close OUT or die "Couldn't close $out, $!\n"; | |
63 | |
64 exit; |