Mercurial > repos > devteam > weightedaverage
comparison WeightedAverage.py @ 0:e11314245c2a draft
Imported from capsule None
| author | devteam |
|---|---|
| date | Tue, 01 Apr 2014 09:11:55 -0400 |
| parents | |
| children | 68a40b074399 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:e11314245c2a |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 """ | |
| 3 usage: %prog bed_file_1 bed_file_2 out_file | |
| 4 -1, --cols1=N,N,N,N: Columns for chr, start, end, strand in first file | |
| 5 -2, --cols2=N,N,N,N,N: Columns for chr, start, end, strand, name/value in second file | |
| 6 """ | |
| 7 | |
| 8 import collections | |
| 9 import sys | |
| 10 #import numpy | |
| 11 from galaxy import eggs | |
| 12 import pkg_resources | |
| 13 pkg_resources.require( "bx-python" ) | |
| 14 from galaxy.tools.util.galaxyops import * | |
| 15 from bx.cookbook import doc_optparse | |
| 16 | |
| 17 | |
| 18 #export PYTHONPATH=~/galaxy/lib/ | |
| 19 #running command python WeightedAverage.py interval_interpolate.bed value_interpolate.bed interpolate_result.bed | |
| 20 | |
| 21 def stop_err(msg): | |
| 22 sys.stderr.write(msg) | |
| 23 sys.exit() | |
| 24 | |
| 25 | |
| 26 def FindRate(chromosome, start_stop, dictType): | |
| 27 OverlapList = [] | |
| 28 for tempO in dictType[chromosome]: | |
| 29 DatabaseInterval = [tempO[0], tempO[1]] | |
| 30 Overlap = GetOverlap( start_stop, DatabaseInterval ) | |
| 31 if Overlap > 0: | |
| 32 OverlapList.append([Overlap, tempO[2]]) | |
| 33 | |
| 34 if len(OverlapList) > 0: | |
| 35 SumRecomb = 0 | |
| 36 SumOverlap = 0 | |
| 37 for member in OverlapList: | |
| 38 SumRecomb += member[0]*member[1] | |
| 39 SumOverlap += member[0] | |
| 40 averageRate = SumRecomb/SumOverlap | |
| 41 return averageRate | |
| 42 else: | |
| 43 return 'NA' | |
| 44 | |
| 45 | |
| 46 def GetOverlap(a, b): | |
| 47 return min(a[1], b[1])-max(a[0], b[0]) | |
| 48 | |
| 49 | |
| 50 options, args = doc_optparse.parse( __doc__ ) | |
| 51 | |
| 52 try: | |
| 53 chr_col_1, start_col_1, end_col_1, strand_col1 = parse_cols_arg( options.cols1 ) | |
| 54 chr_col_2, start_col_2, end_col_2, strand_col2, name_col_2 = parse_cols_arg( options.cols2 ) | |
| 55 input1, input2, input3 = args | |
| 56 except Exception, eee: | |
| 57 print eee | |
| 58 stop_err( "Data issue: click the pencil icon in the history item to correct the metadata attributes." ) | |
| 59 | |
| 60 fd2 = open(input2) | |
| 61 lines2 = fd2.readlines() | |
| 62 RecombChrDict = collections.defaultdict(list) | |
| 63 | |
| 64 skipped = 0 | |
| 65 for line in lines2: | |
| 66 temp = line.strip().split() | |
| 67 try: | |
| 68 assert float(temp[int(name_col_2)]) | |
| 69 except: | |
| 70 skipped += 1 | |
| 71 continue | |
| 72 tempIndex = [int(temp[int(start_col_2)]), int(temp[int(end_col_2)]), float(temp[int(name_col_2)])] | |
| 73 RecombChrDict[temp[int(chr_col_2)]].append(tempIndex) | |
| 74 | |
| 75 print "Skipped %d features with invalid values" % (skipped) | |
| 76 | |
| 77 fd1 = open(input1) | |
| 78 lines = fd1.readlines() | |
| 79 finalProduct = '' | |
| 80 for line in lines: | |
| 81 temp = line.strip().split('\t') | |
| 82 chromosome = temp[int(chr_col_1)] | |
| 83 start = int(temp[int(start_col_1)]) | |
| 84 stop = int(temp[int(end_col_1)]) | |
| 85 start_stop = [start, stop] | |
| 86 RecombRate = FindRate( chromosome, start_stop, RecombChrDict ) | |
| 87 try: | |
| 88 RecombRate = "%.4f" % (float(RecombRate)) | |
| 89 except: | |
| 90 RecombRate = RecombRate | |
| 91 finalProduct += line.strip()+'\t'+str(RecombRate)+'\n' | |
| 92 fdd = open(input3, 'w') | |
| 93 fdd.writelines(finalProduct) | |
| 94 fdd.close() |
