comparison mergeXenaMutation.py @ 55:1093078e7976

merge mutation data conform to new mutationVector data standard
author jingchunzhu
date Fri, 18 Sep 2015 10:24:39 -0700
parents 9806198df91f
children
comparison
equal deleted inserted replaced
54:59dbe857f5d4 55:1093078e7976
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 2
3 import argparse 3 import argparse
4 import string, os, sys 4 import string, os, sys
5 5
6 requiredCOLs = ["chr", "start","end","reference","alt","gene","effect"] 6 requiredCOLs = ["chr", "start","end","reference","alt"]
7 7
8 def headerError(filename, column, ferror): 8 def headerError(filename, column, ferror):
9 ferror.write(filename +" does not have column " + column+"\n") 9 ferror.write(filename +" does not have column " + column+"\n")
10 ferror.close() 10 ferror.close()
11 sys.exit(1) 11 sys.exit(1)
24 #header 24 #header
25 line = fin.readline() 25 line = fin.readline()
26 fin.close() 26 fin.close()
27 if line [0]=="#": 27 if line [0]=="#":
28 line = line[1:-1] 28 line = line[1:-1]
29 data = string.split(line,"\t") 29 data = string.split(string.strip(line),"\t")
30 30
31 columnDic["chr"]= findAnyValueInList (["chr","chrom"], data) 31 columnDic["chr"]= findAnyValueInList (["chr","chrom", "Chr"], data)
32 if columnDic["chr"] ==-1: 32 if columnDic["chr"] ==-1:
33 headerError(infile, "chr", ferror) 33 headerError(infile, "chr", ferror)
34 34
35 columnDic["start"]= findAnyValueInList (["start","chrStart"], data) 35 columnDic["start"]= findAnyValueInList (["start","chrStart","Start"], data)
36 if columnDic["start"] == -1: 36 if columnDic["start"] == -1:
37 headerError(infile, "start", ferror) 37 headerError(infile, "start", ferror)
38 38
39 columnDic["end"]= findAnyValueInList (["end","chrEnd"], data) 39 columnDic["end"]= findAnyValueInList (["end","chrEnd", "End"], data)
40 if columnDic["end"] == -1: 40 if columnDic["end"] == -1:
41 headerError(infile, "end", ferror) 41 headerError(infile, "end", ferror)
42 42
43 columnDic["alt"]= findAnyValueInList (["alt"], data) 43 columnDic["alt"]= findAnyValueInList (["alt","Alt"], data)
44 if columnDic["alt"] == -1: 44 if columnDic["alt"] == -1:
45 headerError(infile, "alt", ferror) 45 headerError(infile, "alt", ferror)
46 46
47 columnDic["reference"]= findAnyValueInList (["reference","ref"], data) 47 columnDic["reference"]= findAnyValueInList (["reference","ref","Reference","Ref"], data)
48 if columnDic["reference"] == -1: 48 if columnDic["reference"] == -1:
49 headerError(infile, "reference", ferror) 49 headerError(infile, "reference", ferror)
50 50
51 columnDic["gene"]= findAnyValueInList (["gene"], data) 51 #columnDic["gene"]= findAnyValueInList (["gene","Gene"], data)
52 if columnDic["gene"] == -1: 52 #if columnDic["gene"] == -1:
53 headerError(infile, "gene", ferror) 53 # headerError(infile, "gene", ferror)
54 54
55 columnDic["effect"]= findAnyValueInList (["effect"], data) 55 #columnDic["effect"]= findAnyValueInList (["effect"], data)
56 if columnDic["effect"] == -1: 56 #if columnDic["effect"] == -1:
57 headerError(infile, "effect", ferror) 57 # headerError(infile, "effect", ferror)
58 58
59 requiredCols = columnDic.keys() 59 requiredCols = columnDic.keys()
60 requiredColsPos = columnDic.values() 60 requiredColsPos = columnDic.values()
61 for i in range(1,len(data)): 61 for i in range(1,len(data)):
62 if i not in requiredColsPos: 62 if i not in requiredColsPos:
72 if col not in allCols: 72 if col not in allCols:
73 allCols.append(col) 73 allCols.append(col)
74 return 74 return
75 75
76 def outputHeader (requiredCOLs,allCols,fout): 76 def outputHeader (requiredCOLs,allCols,fout):
77 fout.write("#sample") 77 fout.write("sample")
78 for col in requiredCOLs: 78 for col in requiredCOLs:
79 fout.write("\t"+col) 79 fout.write("\t"+col)
80 for col in allCols: 80 for col in allCols:
81 if col not in requiredCOLs: 81 if col not in requiredCOLs:
82 fout.write("\t"+col) 82 fout.write("\t"+col)