Mercurial > repos > davidvanzessen > imgt_convert
comparison imgtconvert.py @ 1:83cb18fb0a87 draft
Uploaded
author | davidvanzessen |
---|---|
date | Wed, 13 Nov 2013 09:46:37 -0500 |
parents | 14e80e5c3353 |
children | 0544b052af07 |
comparison
equal
deleted
inserted
replaced
0:14e80e5c3353 | 1:83cb18fb0a87 |
---|---|
1 import pandas as pd | 1 import pandas as pd |
2 import re | 2 import re |
3 import argparse | 3 import argparse |
4 import os | 4 import os |
5 | 5 |
6 def stop_err( msg, ret=1 ): | |
7 sys.stderr.write( msg ) | |
8 sys.exit( ret ) | |
6 | 9 |
7 #docs.python.org/dev/library/argparse.html | 10 #docs.python.org/dev/library/argparse.html |
8 parser = argparse.ArgumentParser() | 11 parser = argparse.ArgumentParser() |
9 parser.add_argument("--input", help="Input folder with files") | 12 parser.add_argument("--input", help="Input folder with files") |
10 parser.add_argument("--output", help="Output file") | 13 parser.add_argument("--output", help="Output file") |
17 | 20 |
18 added_summary_columns = [u'V-REGION identity %', u'V-REGION identity nt', u'D-REGION reading frame', u'AA JUNCTION', u'Functionality comment', u'Sequence'] | 21 added_summary_columns = [u'V-REGION identity %', u'V-REGION identity nt', u'D-REGION reading frame', u'AA JUNCTION', u'Functionality comment', u'Sequence'] |
19 added_sequence_columns = [u'FR1-IMGT', u'FR2-IMGT', u'FR3-IMGT', u'CDR3-IMGT', u'JUNCTION', u'J-REGION', u'FR4-IMGT'] | 22 added_sequence_columns = [u'FR1-IMGT', u'FR2-IMGT', u'FR3-IMGT', u'CDR3-IMGT', u'JUNCTION', u'J-REGION', u'FR4-IMGT'] |
20 added_junction_columns = [u"P3'V-nt nb", u'N1-REGION-nt nb', u"P5'D-nt nb", u"P3'D-nt nb", u'N2-REGION-nt nb', u"P5'J-nt nb", u"3'V-REGION trimmed-nt nb", u"5'D-REGION trimmed-nt nb", u"3'D-REGION trimmed-nt nb", u"5'J-REGION trimmed-nt nb"] | 23 added_junction_columns = [u"P3'V-nt nb", u'N1-REGION-nt nb', u"P5'D-nt nb", u"P3'D-nt nb", u'N2-REGION-nt nb', u"P5'J-nt nb", u"3'V-REGION trimmed-nt nb", u"5'D-REGION trimmed-nt nb", u"3'D-REGION trimmed-nt nb", u"5'J-REGION trimmed-nt nb"] |
21 | 24 |
25 inputFolder = args.input | |
22 | 26 |
23 dirContents = os.listdir(args.input) | 27 dirContents = os.listdir(inputFolder) |
24 if len(dirContents) == 1: | 28 if len(dirContents) == 1: |
25 if os.path.isdir(dirContents[0]): | 29 inputFolder = os.path.join(inputFolder, dirContents[0]) |
26 dirContents = os.listdir(dirContents[0]) | 30 if os.path.isdir(inputFolder): |
27 files = sorted([os.path.join(args.input, f) for f in dirContents]) | 31 print "is dir" |
28 | 32 dirContents = os.listdir(inputFolder) |
29 print files | 33 files = sorted([os.path.join(inputFolder, f) for f in dirContents]) |
30 | 34 |
31 if len(files) % 3 is not 0: | 35 if len(files) % 3 is not 0: |
32 print "Files in zip not a multiple of 3, it should contain the all the 1_, 5_ and 6_ files for a sample" | 36 stop_err("Files in zip not a multiple of 3, it should contain the all the 1_, 5_ and 6_ files for a sample") |
33 import sys | 37 import sys |
34 sys.exit() | 38 sys.exit() |
35 | 39 |
36 triplets = [] | 40 triplets = [] |
37 step = len(files) / 3 | 41 step = len(files) / 3 |