weightedaverage: WeightedAverage.py comparison

comparison WeightedAverage.py @ 0:e11314245c2a draft

Imported from capsule None

author	devteam
date	Tue, 01 Apr 2014 09:11:55 -0400
parents
children	68a40b074399

comparison

equal deleted inserted replaced

--1:000000000000
+:e11314245c2a
+#!/usr/bin/env python
+"""
+usage: %prog bed_file_1 bed_file_2 out_file
+-1, --cols1=N,N,N,N: Columns for chr, start, end, strand in first file
+-2, --cols2=N,N,N,N,N: Columns for chr, start, end, strand, name/value in second file
+"""
+import collections
+import sys
+#import numpy
+from galaxy import eggs
+import pkg_resources
+pkg_resources.require( "bx-python" )
+from galaxy.tools.util.galaxyops import *
+from bx.cookbook import doc_optparse
+#export PYTHONPATH=~/galaxy/lib/
+#running command python WeightedAverage.py interval_interpolate.bed value_interpolate.bed interpolate_result.bed
+def stop_err(msg):
+sys.stderr.write(msg)
+sys.exit()
+def FindRate(chromosome, start_stop, dictType):
+OverlapList = []
+for tempO in dictType[chromosome]:
+DatabaseInterval = [tempO[0], tempO[1]]
+Overlap = GetOverlap( start_stop, DatabaseInterval )
+if Overlap > 0:
+OverlapList.append([Overlap, tempO[2]])
+if len(OverlapList) > 0:
+SumRecomb = 0
+SumOverlap = 0
+for member in OverlapList:
+SumRecomb += member[0]*member[1]
+SumOverlap += member[0]
+averageRate = SumRecomb/SumOverlap
+return averageRate
+else:
+return 'NA'
+def GetOverlap(a, b):
+return min(a[1], b[1])-max(a[0], b[0])
+options, args = doc_optparse.parse( __doc__ )
+try:
+chr_col_1, start_col_1, end_col_1, strand_col1 = parse_cols_arg( options.cols1 )
+chr_col_2, start_col_2, end_col_2, strand_col2, name_col_2 = parse_cols_arg( options.cols2 )
+input1, input2, input3 = args
+except Exception, eee:
+print eee
+stop_err( "Data issue: click the pencil icon in the history item to correct the metadata attributes." )
+fd2 = open(input2)
+lines2 = fd2.readlines()
+RecombChrDict = collections.defaultdict(list)
+skipped = 0
+for line in lines2:
+temp = line.strip().split()
+try:
+assert float(temp[int(name_col_2)])
+except:
+skipped += 1
+continue
+tempIndex = [int(temp[int(start_col_2)]), int(temp[int(end_col_2)]), float(temp[int(name_col_2)])]
+RecombChrDict[temp[int(chr_col_2)]].append(tempIndex)
+print "Skipped %d features with invalid values" % (skipped)
+fd1 = open(input1)
+lines = fd1.readlines()
+finalProduct = ''
+for line in lines:
+temp = line.strip().split('\t')
+chromosome = temp[int(chr_col_1)]
+start = int(temp[int(start_col_1)])
+stop = int(temp[int(end_col_1)])
+start_stop = [start, stop]
+RecombRate = FindRate( chromosome, start_stop, RecombChrDict )
+try:
+RecombRate = "%.4f" % (float(RecombRate))
+except:
+RecombRate = RecombRate
+finalProduct += line.strip()+'\t'+str(RecombRate)+'\n'
+fdd = open(input3, 'w')
+fdd.writelines(finalProduct)
+fdd.close()

Mercurial > repos > devteam > weightedaverage

comparison WeightedAverage.py @ 0:e11314245c2a draft