# HG changeset patch # User jingchunzhu # Date 1442597079 25200 # Node ID 1093078e7976bfbf255d94ba78028012e805cfa9 # Parent 59dbe857f5d472815e521d4a5af5e9579e864ac5 merge mutation data conform to new mutationVector data standard diff -r 59dbe857f5d4 -r 1093078e7976 mergeXenaMutation.py --- a/mergeXenaMutation.py Thu Sep 17 22:03:04 2015 -0700 +++ b/mergeXenaMutation.py Fri Sep 18 10:24:39 2015 -0700 @@ -3,7 +3,7 @@ import argparse import string, os, sys -requiredCOLs = ["chr", "start","end","reference","alt","gene","effect"] +requiredCOLs = ["chr", "start","end","reference","alt"] def headerError(filename, column, ferror): ferror.write(filename +" does not have column " + column+"\n") @@ -26,35 +26,35 @@ fin.close() if line [0]=="#": line = line[1:-1] - data = string.split(line,"\t") + data = string.split(string.strip(line),"\t") - columnDic["chr"]= findAnyValueInList (["chr","chrom"], data) + columnDic["chr"]= findAnyValueInList (["chr","chrom", "Chr"], data) if columnDic["chr"] ==-1: headerError(infile, "chr", ferror) - columnDic["start"]= findAnyValueInList (["start","chrStart"], data) + columnDic["start"]= findAnyValueInList (["start","chrStart","Start"], data) if columnDic["start"] == -1: headerError(infile, "start", ferror) - columnDic["end"]= findAnyValueInList (["end","chrEnd"], data) + columnDic["end"]= findAnyValueInList (["end","chrEnd", "End"], data) if columnDic["end"] == -1: headerError(infile, "end", ferror) - columnDic["alt"]= findAnyValueInList (["alt"], data) + columnDic["alt"]= findAnyValueInList (["alt","Alt"], data) if columnDic["alt"] == -1: headerError(infile, "alt", ferror) - columnDic["reference"]= findAnyValueInList (["reference","ref"], data) + columnDic["reference"]= findAnyValueInList (["reference","ref","Reference","Ref"], data) if columnDic["reference"] == -1: headerError(infile, "reference", ferror) - columnDic["gene"]= findAnyValueInList (["gene"], data) - if columnDic["gene"] == -1: - headerError(infile, "gene", ferror) + #columnDic["gene"]= findAnyValueInList (["gene","Gene"], data) + #if columnDic["gene"] == -1: + # headerError(infile, "gene", ferror) - columnDic["effect"]= findAnyValueInList (["effect"], data) - if columnDic["effect"] == -1: - headerError(infile, "effect", ferror) + #columnDic["effect"]= findAnyValueInList (["effect"], data) + #if columnDic["effect"] == -1: + # headerError(infile, "effect", ferror) requiredCols = columnDic.keys() requiredColsPos = columnDic.values() @@ -74,7 +74,7 @@ return def outputHeader (requiredCOLs,allCols,fout): - fout.write("#sample") + fout.write("sample") for col in requiredCOLs: fout.write("\t"+col) for col in allCols: