view mutation_column_checker.py @ 5:495a521cf9f2 draft

"planemo upload commit 40d62ec3d2fea3c3bac605c976941e1f3b7e2cd4"
author rhpvorderman
date Tue, 16 Nov 2021 11:16:06 +0000
parents 64d74ba01a7c
children
line wrap: on
line source

import re

mutationMatcher = re.compile("^([nactg])(\d+).([nactg]),?[ ]?([A-Z])?(\d+)?[>]?([A-Z;])?(.*)?")

with open("7_V-REGION-mutation-and-AA-change-table.txt", 'r') as file_handle:
    first = True
    fr3_index = -1
    for i, line in enumerate(file_handle):
        line_split = line.split("\t")
        if first:
            fr3_index = line_split.index("FR3-IMGT")
            first = False
            continue

        if len(line_split) < fr3_index:
            continue
        
        fr3_data = line_split[fr3_index]
        if len(fr3_data) > 5:
            try:
                test = [mutationMatcher.match(x).groups() for x in fr3_data.split("|") if x]
            except:
                print((line_split[1]))
                print(("Something went wrong at line {line} with:".format(line=line_split[0])))
                #print([x for x in fr3_data.split("|") if not mutationMatcher.match(x)])
        if i % 100000 == 0:
            print(i)