annotate disease_ontology_gene_fuzzy_selector.pl @ 0:6b09d2ed034f draft

Uploaded tool tarball.
author devteam
date Tue, 20 Aug 2013 10:55:38 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
1 #!/usr/bin/env perl
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
2
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
3 use strict;
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
4 use warnings;
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
5
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
6 ##################################################################
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
7 # Select genes that are associated with the diseases listed in the
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
8 # disease ontology.
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
9 # ontology: http://do-wiki.nubic.northwestern.edu/index.php/Main_Page
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
10 # gene associations by FunDO: http://projects.bioinformatics.northwestern.edu/do_rif/
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
11 # Sept 2010, switch to doLite
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
12 # input: build outfile sourceFileLoc.loc term or partial term
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
13 ##################################################################
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
14
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
15 if (!@ARGV or @ARGV < 3) {
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
16 print "usage: disease_ontology_gene_selector.pl build outfile.txt sourceFile.loc [list of terms]\n";
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
17 exit;
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
18 }
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
19
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
20 my $build = shift @ARGV;
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
21 my $out = shift @ARGV;
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
22 my $in = shift @ARGV;
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
23 my $term = shift @ARGV;
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
24 $term =~ s/^'//; #remove quotes protecting from shell
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
25 $term =~ s/'$//;
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
26 my $data;
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
27 open(LOC, $in) or die "Couldn't open $in, $!\n";
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
28 while (<LOC>) {
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
29 chomp;
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
30 if (/^\s*#/) { next; }
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
31 my @f = split(/\t/);
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
32 if ($f[0] eq $build) {
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
33 if ($f[1] eq 'disease associated genes') {
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
34 $data = $f[2];
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
35 }
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
36 }
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
37 }
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
38 close LOC or die "Couldn't close $in, $!\n";
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
39 if (!$data) {
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
40 print "Error $build not found in $in\n";
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
41 exit;
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
42 }
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
43 if (!defined $term) {
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
44 print "No disease term entered\n";
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
45 exit;
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
46 }
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
47
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
48 #start with just fuzzy term matches
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
49 open(OUT, ">", $out) or die "Couldn't open $out, $!\n";
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
50 open(FH, $data) or die "Couldn't open data file $data, $!\n";
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
51 $term =~ s/\s+/|/g; #use OR between words
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
52 while (<FH>) {
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
53 chomp;
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
54 my @f = split(/\t/); #chrom start end strand geneName geneID disease
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
55 if ($f[6] =~ /($term)/i) {
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
56 print OUT join("\t", @f), "\n";
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
57 }elsif ($term eq 'disease') { #print all with disease
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
58 print OUT join("\t", @f), "\n";
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
59 }
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
60 }
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
61 close FH or die "Couldn't close data file $data, $!\n";
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
62 close OUT or die "Couldn't close $out, $!\n";
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
63
6b09d2ed034f Uploaded tool tarball.
devteam
parents:
diff changeset
64 exit;