diff mergeXenaMutation.py @ 55:1093078e7976

merge mutation data conform to new mutationVector data standard
author jingchunzhu
date Fri, 18 Sep 2015 10:24:39 -0700
parents 9806198df91f
children
line wrap: on
line diff
--- a/mergeXenaMutation.py	Thu Sep 17 22:03:04 2015 -0700
+++ b/mergeXenaMutation.py	Fri Sep 18 10:24:39 2015 -0700
@@ -3,7 +3,7 @@
 import argparse
 import string, os, sys
 
-requiredCOLs = ["chr", "start","end","reference","alt","gene","effect"]
+requiredCOLs = ["chr", "start","end","reference","alt"]
 
 def headerError(filename, column, ferror):
   ferror.write(filename +" does not have column " + column+"\n")
@@ -26,35 +26,35 @@
   fin.close()
   if line [0]=="#":
     line = line[1:-1]
-  data = string.split(line,"\t")
+  data = string.split(string.strip(line),"\t")
 
-  columnDic["chr"]= findAnyValueInList (["chr","chrom"], data)
+  columnDic["chr"]= findAnyValueInList (["chr","chrom", "Chr"], data)
   if columnDic["chr"] ==-1:
     headerError(infile, "chr", ferror)
 
-  columnDic["start"]= findAnyValueInList (["start","chrStart"], data)
+  columnDic["start"]= findAnyValueInList (["start","chrStart","Start"], data)
   if columnDic["start"] == -1:
     headerError(infile, "start", ferror)
 
-  columnDic["end"]= findAnyValueInList (["end","chrEnd"], data)
+  columnDic["end"]= findAnyValueInList (["end","chrEnd", "End"], data)
   if columnDic["end"] == -1:
     headerError(infile, "end", ferror)
 
-  columnDic["alt"]= findAnyValueInList (["alt"], data)
+  columnDic["alt"]= findAnyValueInList (["alt","Alt"], data)
   if columnDic["alt"] == -1:
     headerError(infile, "alt", ferror)
 
-  columnDic["reference"]= findAnyValueInList (["reference","ref"], data)
+  columnDic["reference"]= findAnyValueInList (["reference","ref","Reference","Ref"], data)
   if columnDic["reference"] == -1:
     headerError(infile, "reference", ferror)
 
-  columnDic["gene"]= findAnyValueInList (["gene"], data)
-  if columnDic["gene"] == -1:
-    headerError(infile, "gene", ferror)
+  #columnDic["gene"]= findAnyValueInList (["gene","Gene"], data)
+  #if columnDic["gene"] == -1:
+  #  headerError(infile, "gene", ferror)
 
-  columnDic["effect"]= findAnyValueInList (["effect"], data)
-  if columnDic["effect"] == -1:
-    headerError(infile, "effect", ferror)
+  #columnDic["effect"]= findAnyValueInList (["effect"], data)
+  #if columnDic["effect"] == -1:
+  #  headerError(infile, "effect", ferror)
 
   requiredCols = columnDic.keys()
   requiredColsPos = columnDic.values()
@@ -74,7 +74,7 @@
   return
 
 def outputHeader (requiredCOLs,allCols,fout):
-  fout.write("#sample")
+  fout.write("sample")
   for col in requiredCOLs:
     fout.write("\t"+col)
   for col in allCols: