0
|
1 #!/usr/bin/env perl
|
|
2
|
|
3 use strict;
|
|
4 use warnings;
|
|
5
|
|
6 ##################################################################
|
|
7 # Select genes that are associated with the diseases listed in the
|
|
8 # disease ontology.
|
|
9 # ontology: http://do-wiki.nubic.northwestern.edu/index.php/Main_Page
|
|
10 # gene associations by FunDO: http://projects.bioinformatics.northwestern.edu/do_rif/
|
|
11 # Sept 2010, switch to doLite
|
|
12 # input: build outfile sourceFileLoc.loc term or partial term
|
|
13 ##################################################################
|
|
14
|
|
15 if (!@ARGV or @ARGV < 3) {
|
|
16 print "usage: disease_ontology_gene_selector.pl build outfile.txt sourceFile.loc [list of terms]\n";
|
|
17 exit;
|
|
18 }
|
|
19
|
|
20 my $build = shift @ARGV;
|
|
21 my $out = shift @ARGV;
|
|
22 my $in = shift @ARGV;
|
|
23 my $term = shift @ARGV;
|
|
24 $term =~ s/^'//; #remove quotes protecting from shell
|
|
25 $term =~ s/'$//;
|
|
26 my $data;
|
|
27 open(LOC, $in) or die "Couldn't open $in, $!\n";
|
|
28 while (<LOC>) {
|
|
29 chomp;
|
|
30 if (/^\s*#/) { next; }
|
|
31 my @f = split(/\t/);
|
|
32 if ($f[0] eq $build) {
|
|
33 if ($f[1] eq 'disease associated genes') {
|
|
34 $data = $f[2];
|
|
35 }
|
|
36 }
|
|
37 }
|
|
38 close LOC or die "Couldn't close $in, $!\n";
|
|
39 if (!$data) {
|
|
40 print "Error $build not found in $in\n";
|
|
41 exit;
|
|
42 }
|
|
43 if (!defined $term) {
|
|
44 print "No disease term entered\n";
|
|
45 exit;
|
|
46 }
|
|
47
|
|
48 #start with just fuzzy term matches
|
|
49 open(OUT, ">", $out) or die "Couldn't open $out, $!\n";
|
|
50 open(FH, $data) or die "Couldn't open data file $data, $!\n";
|
|
51 $term =~ s/\s+/|/g; #use OR between words
|
|
52 while (<FH>) {
|
|
53 chomp;
|
|
54 my @f = split(/\t/); #chrom start end strand geneName geneID disease
|
|
55 if ($f[6] =~ /($term)/i) {
|
|
56 print OUT join("\t", @f), "\n";
|
|
57 }elsif ($term eq 'disease') { #print all with disease
|
|
58 print OUT join("\t", @f), "\n";
|
|
59 }
|
|
60 }
|
|
61 close FH or die "Couldn't close data file $data, $!\n";
|
|
62 close OUT or die "Couldn't close $out, $!\n";
|
|
63
|
|
64 exit;
|