Mercurial > repos > melissacline > ucsc_cancer_utilities
comparison mergeXenaMutation.py @ 55:1093078e7976
merge mutation data conform to new mutationVector data standard
author | jingchunzhu |
---|---|
date | Fri, 18 Sep 2015 10:24:39 -0700 |
parents | 9806198df91f |
children |
comparison
equal
deleted
inserted
replaced
54:59dbe857f5d4 | 55:1093078e7976 |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 | 2 |
3 import argparse | 3 import argparse |
4 import string, os, sys | 4 import string, os, sys |
5 | 5 |
6 requiredCOLs = ["chr", "start","end","reference","alt","gene","effect"] | 6 requiredCOLs = ["chr", "start","end","reference","alt"] |
7 | 7 |
8 def headerError(filename, column, ferror): | 8 def headerError(filename, column, ferror): |
9 ferror.write(filename +" does not have column " + column+"\n") | 9 ferror.write(filename +" does not have column " + column+"\n") |
10 ferror.close() | 10 ferror.close() |
11 sys.exit(1) | 11 sys.exit(1) |
24 #header | 24 #header |
25 line = fin.readline() | 25 line = fin.readline() |
26 fin.close() | 26 fin.close() |
27 if line [0]=="#": | 27 if line [0]=="#": |
28 line = line[1:-1] | 28 line = line[1:-1] |
29 data = string.split(line,"\t") | 29 data = string.split(string.strip(line),"\t") |
30 | 30 |
31 columnDic["chr"]= findAnyValueInList (["chr","chrom"], data) | 31 columnDic["chr"]= findAnyValueInList (["chr","chrom", "Chr"], data) |
32 if columnDic["chr"] ==-1: | 32 if columnDic["chr"] ==-1: |
33 headerError(infile, "chr", ferror) | 33 headerError(infile, "chr", ferror) |
34 | 34 |
35 columnDic["start"]= findAnyValueInList (["start","chrStart"], data) | 35 columnDic["start"]= findAnyValueInList (["start","chrStart","Start"], data) |
36 if columnDic["start"] == -1: | 36 if columnDic["start"] == -1: |
37 headerError(infile, "start", ferror) | 37 headerError(infile, "start", ferror) |
38 | 38 |
39 columnDic["end"]= findAnyValueInList (["end","chrEnd"], data) | 39 columnDic["end"]= findAnyValueInList (["end","chrEnd", "End"], data) |
40 if columnDic["end"] == -1: | 40 if columnDic["end"] == -1: |
41 headerError(infile, "end", ferror) | 41 headerError(infile, "end", ferror) |
42 | 42 |
43 columnDic["alt"]= findAnyValueInList (["alt"], data) | 43 columnDic["alt"]= findAnyValueInList (["alt","Alt"], data) |
44 if columnDic["alt"] == -1: | 44 if columnDic["alt"] == -1: |
45 headerError(infile, "alt", ferror) | 45 headerError(infile, "alt", ferror) |
46 | 46 |
47 columnDic["reference"]= findAnyValueInList (["reference","ref"], data) | 47 columnDic["reference"]= findAnyValueInList (["reference","ref","Reference","Ref"], data) |
48 if columnDic["reference"] == -1: | 48 if columnDic["reference"] == -1: |
49 headerError(infile, "reference", ferror) | 49 headerError(infile, "reference", ferror) |
50 | 50 |
51 columnDic["gene"]= findAnyValueInList (["gene"], data) | 51 #columnDic["gene"]= findAnyValueInList (["gene","Gene"], data) |
52 if columnDic["gene"] == -1: | 52 #if columnDic["gene"] == -1: |
53 headerError(infile, "gene", ferror) | 53 # headerError(infile, "gene", ferror) |
54 | 54 |
55 columnDic["effect"]= findAnyValueInList (["effect"], data) | 55 #columnDic["effect"]= findAnyValueInList (["effect"], data) |
56 if columnDic["effect"] == -1: | 56 #if columnDic["effect"] == -1: |
57 headerError(infile, "effect", ferror) | 57 # headerError(infile, "effect", ferror) |
58 | 58 |
59 requiredCols = columnDic.keys() | 59 requiredCols = columnDic.keys() |
60 requiredColsPos = columnDic.values() | 60 requiredColsPos = columnDic.values() |
61 for i in range(1,len(data)): | 61 for i in range(1,len(data)): |
62 if i not in requiredColsPos: | 62 if i not in requiredColsPos: |
72 if col not in allCols: | 72 if col not in allCols: |
73 allCols.append(col) | 73 allCols.append(col) |
74 return | 74 return |
75 | 75 |
76 def outputHeader (requiredCOLs,allCols,fout): | 76 def outputHeader (requiredCOLs,allCols,fout): |
77 fout.write("#sample") | 77 fout.write("sample") |
78 for col in requiredCOLs: | 78 for col in requiredCOLs: |
79 fout.write("\t"+col) | 79 fout.write("\t"+col) |
80 for col in allCols: | 80 for col in allCols: |
81 if col not in requiredCOLs: | 81 if col not in requiredCOLs: |
82 fout.write("\t"+col) | 82 fout.write("\t"+col) |