Mercurial > repos > melissacline > ucsc_cancer_utilities
diff mergeXenaMutation.py @ 55:1093078e7976
merge mutation data conform to new mutationVector data standard
author | jingchunzhu |
---|---|
date | Fri, 18 Sep 2015 10:24:39 -0700 |
parents | 9806198df91f |
children |
line wrap: on
line diff
--- a/mergeXenaMutation.py Thu Sep 17 22:03:04 2015 -0700 +++ b/mergeXenaMutation.py Fri Sep 18 10:24:39 2015 -0700 @@ -3,7 +3,7 @@ import argparse import string, os, sys -requiredCOLs = ["chr", "start","end","reference","alt","gene","effect"] +requiredCOLs = ["chr", "start","end","reference","alt"] def headerError(filename, column, ferror): ferror.write(filename +" does not have column " + column+"\n") @@ -26,35 +26,35 @@ fin.close() if line [0]=="#": line = line[1:-1] - data = string.split(line,"\t") + data = string.split(string.strip(line),"\t") - columnDic["chr"]= findAnyValueInList (["chr","chrom"], data) + columnDic["chr"]= findAnyValueInList (["chr","chrom", "Chr"], data) if columnDic["chr"] ==-1: headerError(infile, "chr", ferror) - columnDic["start"]= findAnyValueInList (["start","chrStart"], data) + columnDic["start"]= findAnyValueInList (["start","chrStart","Start"], data) if columnDic["start"] == -1: headerError(infile, "start", ferror) - columnDic["end"]= findAnyValueInList (["end","chrEnd"], data) + columnDic["end"]= findAnyValueInList (["end","chrEnd", "End"], data) if columnDic["end"] == -1: headerError(infile, "end", ferror) - columnDic["alt"]= findAnyValueInList (["alt"], data) + columnDic["alt"]= findAnyValueInList (["alt","Alt"], data) if columnDic["alt"] == -1: headerError(infile, "alt", ferror) - columnDic["reference"]= findAnyValueInList (["reference","ref"], data) + columnDic["reference"]= findAnyValueInList (["reference","ref","Reference","Ref"], data) if columnDic["reference"] == -1: headerError(infile, "reference", ferror) - columnDic["gene"]= findAnyValueInList (["gene"], data) - if columnDic["gene"] == -1: - headerError(infile, "gene", ferror) + #columnDic["gene"]= findAnyValueInList (["gene","Gene"], data) + #if columnDic["gene"] == -1: + # headerError(infile, "gene", ferror) - columnDic["effect"]= findAnyValueInList (["effect"], data) - if columnDic["effect"] == -1: - headerError(infile, "effect", ferror) + #columnDic["effect"]= findAnyValueInList (["effect"], data) + #if columnDic["effect"] == -1: + # headerError(infile, "effect", ferror) requiredCols = columnDic.keys() requiredColsPos = columnDic.values() @@ -74,7 +74,7 @@ return def outputHeader (requiredCOLs,allCols,fout): - fout.write("#sample") + fout.write("sample") for col in requiredCOLs: fout.write("\t"+col) for col in allCols: