| 
0
 | 
     1 
 | 
| 
 | 
     2 
 | 
| 
 | 
     3 import sys
 | 
| 
 | 
     4 
 | 
| 
 | 
     5 # Parse lines in IMGT/GENE-DB such as:
 | 
| 
 | 
     6 # >M12949|TRGV1*01|Homo sapiens|ORF|...
 | 
| 
 | 
     7 
 | 
| 
 | 
     8 open_files = {}
 | 
| 
 | 
     9 current_file = None
 | 
| 
 | 
    10 
 | 
| 
 | 
    11 for l in sys.stdin:
 | 
| 
 | 
    12 
 | 
| 
 | 
    13     if ">" in l:
 | 
| 
 | 
    14         current_file = None
 | 
| 
 | 
    15         if "Homo sapiens" in l and ("V-REGION" in l or "D-REGION" in l or "J-REGION" in l):
 | 
| 
 | 
    16             system = l.split('|')[1][:4]
 | 
| 
 | 
    17             if system.startswith('IG') or system.startswith('TR'):
 | 
| 
 | 
    18 
 | 
| 
 | 
    19                 if system in open_files:
 | 
| 
 | 
    20                     current_file = open_files[system]
 | 
| 
 | 
    21                 else:
 | 
| 
 | 
    22                     name = '%s.fa' % system
 | 
| 
 | 
    23                     print "  ==>", name
 | 
| 
 | 
    24                     current_file = open(name, 'w')
 | 
| 
 | 
    25                     open_files[system] = current_file
 | 
| 
 | 
    26 
 | 
| 
 | 
    27 
 | 
| 
 | 
    28     if current_file:
 | 
| 
 | 
    29             current_file.write(l)
 | 
| 
 | 
    30 
 | 
| 
 | 
    31 
 | 
| 
 | 
    32 
 |