comparison disease_ontology_gene_fuzzy_selector.pl @ 0:6b09d2ed034f draft

Uploaded tool tarball.
author devteam
date Tue, 20 Aug 2013 10:55:38 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:6b09d2ed034f
1 #!/usr/bin/env perl
2
3 use strict;
4 use warnings;
5
6 ##################################################################
7 # Select genes that are associated with the diseases listed in the
8 # disease ontology.
9 # ontology: http://do-wiki.nubic.northwestern.edu/index.php/Main_Page
10 # gene associations by FunDO: http://projects.bioinformatics.northwestern.edu/do_rif/
11 # Sept 2010, switch to doLite
12 # input: build outfile sourceFileLoc.loc term or partial term
13 ##################################################################
14
15 if (!@ARGV or @ARGV < 3) {
16 print "usage: disease_ontology_gene_selector.pl build outfile.txt sourceFile.loc [list of terms]\n";
17 exit;
18 }
19
20 my $build = shift @ARGV;
21 my $out = shift @ARGV;
22 my $in = shift @ARGV;
23 my $term = shift @ARGV;
24 $term =~ s/^'//; #remove quotes protecting from shell
25 $term =~ s/'$//;
26 my $data;
27 open(LOC, $in) or die "Couldn't open $in, $!\n";
28 while (<LOC>) {
29 chomp;
30 if (/^\s*#/) { next; }
31 my @f = split(/\t/);
32 if ($f[0] eq $build) {
33 if ($f[1] eq 'disease associated genes') {
34 $data = $f[2];
35 }
36 }
37 }
38 close LOC or die "Couldn't close $in, $!\n";
39 if (!$data) {
40 print "Error $build not found in $in\n";
41 exit;
42 }
43 if (!defined $term) {
44 print "No disease term entered\n";
45 exit;
46 }
47
48 #start with just fuzzy term matches
49 open(OUT, ">", $out) or die "Couldn't open $out, $!\n";
50 open(FH, $data) or die "Couldn't open data file $data, $!\n";
51 $term =~ s/\s+/|/g; #use OR between words
52 while (<FH>) {
53 chomp;
54 my @f = split(/\t/); #chrom start end strand geneName geneID disease
55 if ($f[6] =~ /($term)/i) {
56 print OUT join("\t", @f), "\n";
57 }elsif ($term eq 'disease') { #print all with disease
58 print OUT join("\t", @f), "\n";
59 }
60 }
61 close FH or die "Couldn't close data file $data, $!\n";
62 close OUT or die "Couldn't close $out, $!\n";
63
64 exit;