changeset 1:0d9ef2618d9c draft default tip

Uploaded
author morinlab
date Sun, 04 Dec 2016 16:16:44 -0500
parents 552e16f18578
children
files transform_vcf_to_counts.py
diffstat 1 files changed, 74 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/transform_vcf_to_counts.py	Sun Dec 04 16:16:44 2016 -0500
@@ -0,0 +1,74 @@
+import argparse
+import re
+
+version = '1.0.0'
+
+##Make a UI
+
+parser = argparse.ArgumentParser(prog='''Convert Museq vcf to counts''',
+                                 description = '''Generates the tumour_counts from vcf generated by titan_preprocess component
+                                                 of MutationSeq''')
+
+parser.add_argument("-i","--infile", 
+                    required = True, 
+                    help= '''The vcf files generated by museq''')
+
+parser.add_argument("-o","--outfile", 
+                    required = True, 
+                    help="The path to the counts file")
+
+parser.add_argument("--positions_file",
+                    default = None,
+                    help="If provided, all positions that aren't in the file will be filtered out.")
+args = parser.parse_args()
+
+
+class transformVcfCounts(object):
+    def __init__(self):
+        self.args = args
+        self.outfile = open(self.args.outfile, 'w')
+
+        #write header
+        self.outfile.write('chr\tposition\tref\trefCount\tNref\tNrefCount\n')
+        
+    def read_ref_positions(self):
+        if not self.args.positions_file:
+            return
+
+        ref_pos = set()
+        freader = open(self.args.positions_file)
+        for line in freader:
+           line = line.strip().split(':')
+
+           ref_pos.add(tuple(line))
+
+        return ref_pos
+
+    def main(self, ref_pos):
+        infile_stream = open(self.args.infile)
+        for line in infile_stream:
+            if line[0] == '#':
+                continue
+            line = line.strip().split()
+            chrom = line[0]
+            pos  = line[1]
+            ref = line[3]
+            tr = re.split(';|=',line[7])[3]
+            ta = re.split(';|=',line[7])[5]
+            #ndp = re.split(';|=',line[7])[13]
+            
+            if ref_pos and (chrom, pos) in ref_pos:
+                continue
+
+            outstr = '\t'.join([chrom,pos,ref,tr,'X',ta]) + '\n'
+            self.outfile.write(outstr)
+        self.outfile.close()
+        infile_stream.close()
+        
+#---------------------------------------------
+#  Main Program
+#---------------------------------------------
+if __name__ == '__main__':
+    vcf_to_counts = transformVcfCounts()
+    ref_pos = vcf_to_counts.read_ref_positions()
+    vcf_to_counts.main(ref_pos)