comparison imgtconvert.py @ 1:83cb18fb0a87 draft

Uploaded
author davidvanzessen
date Wed, 13 Nov 2013 09:46:37 -0500
parents 14e80e5c3353
children 0544b052af07
comparison
equal deleted inserted replaced
0:14e80e5c3353 1:83cb18fb0a87
1 import pandas as pd 1 import pandas as pd
2 import re 2 import re
3 import argparse 3 import argparse
4 import os 4 import os
5 5
6 def stop_err( msg, ret=1 ):
7 sys.stderr.write( msg )
8 sys.exit( ret )
6 9
7 #docs.python.org/dev/library/argparse.html 10 #docs.python.org/dev/library/argparse.html
8 parser = argparse.ArgumentParser() 11 parser = argparse.ArgumentParser()
9 parser.add_argument("--input", help="Input folder with files") 12 parser.add_argument("--input", help="Input folder with files")
10 parser.add_argument("--output", help="Output file") 13 parser.add_argument("--output", help="Output file")
17 20
18 added_summary_columns = [u'V-REGION identity %', u'V-REGION identity nt', u'D-REGION reading frame', u'AA JUNCTION', u'Functionality comment', u'Sequence'] 21 added_summary_columns = [u'V-REGION identity %', u'V-REGION identity nt', u'D-REGION reading frame', u'AA JUNCTION', u'Functionality comment', u'Sequence']
19 added_sequence_columns = [u'FR1-IMGT', u'FR2-IMGT', u'FR3-IMGT', u'CDR3-IMGT', u'JUNCTION', u'J-REGION', u'FR4-IMGT'] 22 added_sequence_columns = [u'FR1-IMGT', u'FR2-IMGT', u'FR3-IMGT', u'CDR3-IMGT', u'JUNCTION', u'J-REGION', u'FR4-IMGT']
20 added_junction_columns = [u"P3'V-nt nb", u'N1-REGION-nt nb', u"P5'D-nt nb", u"P3'D-nt nb", u'N2-REGION-nt nb', u"P5'J-nt nb", u"3'V-REGION trimmed-nt nb", u"5'D-REGION trimmed-nt nb", u"3'D-REGION trimmed-nt nb", u"5'J-REGION trimmed-nt nb"] 23 added_junction_columns = [u"P3'V-nt nb", u'N1-REGION-nt nb', u"P5'D-nt nb", u"P3'D-nt nb", u'N2-REGION-nt nb', u"P5'J-nt nb", u"3'V-REGION trimmed-nt nb", u"5'D-REGION trimmed-nt nb", u"3'D-REGION trimmed-nt nb", u"5'J-REGION trimmed-nt nb"]
21 24
25 inputFolder = args.input
22 26
23 dirContents = os.listdir(args.input) 27 dirContents = os.listdir(inputFolder)
24 if len(dirContents) == 1: 28 if len(dirContents) == 1:
25 if os.path.isdir(dirContents[0]): 29 inputFolder = os.path.join(inputFolder, dirContents[0])
26 dirContents = os.listdir(dirContents[0]) 30 if os.path.isdir(inputFolder):
27 files = sorted([os.path.join(args.input, f) for f in dirContents]) 31 print "is dir"
28 32 dirContents = os.listdir(inputFolder)
29 print files 33 files = sorted([os.path.join(inputFolder, f) for f in dirContents])
30 34
31 if len(files) % 3 is not 0: 35 if len(files) % 3 is not 0:
32 print "Files in zip not a multiple of 3, it should contain the all the 1_, 5_ and 6_ files for a sample" 36 stop_err("Files in zip not a multiple of 3, it should contain the all the 1_, 5_ and 6_ files for a sample")
33 import sys 37 import sys
34 sys.exit() 38 sys.exit()
35 39
36 triplets = [] 40 triplets = []
37 step = len(files) / 3 41 step = len(files) / 3