Mercurial > repos > devteam > weightedaverage
comparison WeightedAverage.py @ 0:e11314245c2a draft
Imported from capsule None
author | devteam |
---|---|
date | Tue, 01 Apr 2014 09:11:55 -0400 |
parents | |
children | 68a40b074399 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e11314245c2a |
---|---|
1 #!/usr/bin/env python | |
2 """ | |
3 usage: %prog bed_file_1 bed_file_2 out_file | |
4 -1, --cols1=N,N,N,N: Columns for chr, start, end, strand in first file | |
5 -2, --cols2=N,N,N,N,N: Columns for chr, start, end, strand, name/value in second file | |
6 """ | |
7 | |
8 import collections | |
9 import sys | |
10 #import numpy | |
11 from galaxy import eggs | |
12 import pkg_resources | |
13 pkg_resources.require( "bx-python" ) | |
14 from galaxy.tools.util.galaxyops import * | |
15 from bx.cookbook import doc_optparse | |
16 | |
17 | |
18 #export PYTHONPATH=~/galaxy/lib/ | |
19 #running command python WeightedAverage.py interval_interpolate.bed value_interpolate.bed interpolate_result.bed | |
20 | |
21 def stop_err(msg): | |
22 sys.stderr.write(msg) | |
23 sys.exit() | |
24 | |
25 | |
26 def FindRate(chromosome, start_stop, dictType): | |
27 OverlapList = [] | |
28 for tempO in dictType[chromosome]: | |
29 DatabaseInterval = [tempO[0], tempO[1]] | |
30 Overlap = GetOverlap( start_stop, DatabaseInterval ) | |
31 if Overlap > 0: | |
32 OverlapList.append([Overlap, tempO[2]]) | |
33 | |
34 if len(OverlapList) > 0: | |
35 SumRecomb = 0 | |
36 SumOverlap = 0 | |
37 for member in OverlapList: | |
38 SumRecomb += member[0]*member[1] | |
39 SumOverlap += member[0] | |
40 averageRate = SumRecomb/SumOverlap | |
41 return averageRate | |
42 else: | |
43 return 'NA' | |
44 | |
45 | |
46 def GetOverlap(a, b): | |
47 return min(a[1], b[1])-max(a[0], b[0]) | |
48 | |
49 | |
50 options, args = doc_optparse.parse( __doc__ ) | |
51 | |
52 try: | |
53 chr_col_1, start_col_1, end_col_1, strand_col1 = parse_cols_arg( options.cols1 ) | |
54 chr_col_2, start_col_2, end_col_2, strand_col2, name_col_2 = parse_cols_arg( options.cols2 ) | |
55 input1, input2, input3 = args | |
56 except Exception, eee: | |
57 print eee | |
58 stop_err( "Data issue: click the pencil icon in the history item to correct the metadata attributes." ) | |
59 | |
60 fd2 = open(input2) | |
61 lines2 = fd2.readlines() | |
62 RecombChrDict = collections.defaultdict(list) | |
63 | |
64 skipped = 0 | |
65 for line in lines2: | |
66 temp = line.strip().split() | |
67 try: | |
68 assert float(temp[int(name_col_2)]) | |
69 except: | |
70 skipped += 1 | |
71 continue | |
72 tempIndex = [int(temp[int(start_col_2)]), int(temp[int(end_col_2)]), float(temp[int(name_col_2)])] | |
73 RecombChrDict[temp[int(chr_col_2)]].append(tempIndex) | |
74 | |
75 print "Skipped %d features with invalid values" % (skipped) | |
76 | |
77 fd1 = open(input1) | |
78 lines = fd1.readlines() | |
79 finalProduct = '' | |
80 for line in lines: | |
81 temp = line.strip().split('\t') | |
82 chromosome = temp[int(chr_col_1)] | |
83 start = int(temp[int(start_col_1)]) | |
84 stop = int(temp[int(end_col_1)]) | |
85 start_stop = [start, stop] | |
86 RecombRate = FindRate( chromosome, start_stop, RecombChrDict ) | |
87 try: | |
88 RecombRate = "%.4f" % (float(RecombRate)) | |
89 except: | |
90 RecombRate = RecombRate | |
91 finalProduct += line.strip()+'\t'+str(RecombRate)+'\n' | |
92 fdd = open(input3, 'w') | |
93 fdd.writelines(finalProduct) | |
94 fdd.close() |