| 0 | 1 import sys | 
|  | 2 import pandas as pd | 
|  | 3 | 
|  | 4 def main(): | 
|  | 5 	patients = {} | 
|  | 6 	files = [] | 
|  | 7 	sample_id = sys.argv[1] | 
|  | 8 	imgt_files = 0 | 
|  | 9 	blast_files = 0 | 
|  | 10 	#organize files | 
|  | 11 	for arg in sys.argv[2:-2]: | 
|  | 12 		if arg.find("/") is -1: | 
|  | 13 			patients[sample_id] = files | 
|  | 14 			files = [] | 
|  | 15 			sample_id = arg | 
|  | 16 		else: | 
|  | 17 			df = pd.read_csv(arg, sep="\t", dtype=object, error_bad_lines=False) | 
|  | 18 			if "Functionality" in list(df.columns.values): | 
|  | 19 				df["VDJ Frame"][df["Functionality"] != "productive"] = "In-frame with stop codon" | 
|  | 20 				imgt_files += 1 | 
|  | 21 			else: | 
|  | 22 				blast_files += 1 | 
|  | 23 			files.append(df) | 
|  | 24 	patients[sample_id] = files | 
| 19 | 25 	columns = [u'ID', u'VDJ Frame', u'Top V Gene', u'Top D Gene', u'Top J Gene', u'CDR1 Seq', u'CDR1 Length', u'CDR2 Seq', u'CDR2 Length', | 
|  | 26 			   u'CDR3 Seq', u'CDR3 Length', u'CDR3 Seq DNA', u'CDR3 Length DNA', u'Strand', u'CDR3 Found How', u'Functionality', 'V-REGION identity %', | 
|  | 27 			   'V-REGION identity nt', 'D-REGION reading frame', 'AA JUNCTION', 'Functionality comment', 'Sequence', 'FR1-IMGT', 'FR2-IMGT', | 
|  | 28 			   'FR3-IMGT', 'CDR3-IMGT', 'JUNCTION', 'J-REGION', 'FR4-IMGT', 'P3V-nt nb', 'N1-REGION-nt nb', 'P5D-nt nb', 'P3D-nt nb', 'N2-REGION-nt nb', | 
|  | 29 			   'P5J-nt nb', '3V-REGION trimmed-nt nb', '5D-REGION trimmed-nt nb', '3D-REGION trimmed-nt nb', '5J-REGION trimmed-nt nb', u'Sample', u'Replicate'] | 
|  | 30 	if "N-REGION-nt nb" in files[0].columns: | 
|  | 31 		columns.insert(30, "N-REGION-nt nb") | 
| 0 | 32 	if blast_files is not 0: | 
|  | 33 		print "Has a parsed blastn file, using limited columns." | 
|  | 34 		columns = [u'ID', u'VDJ Frame', u'Top V Gene', u'Top D Gene', u'Top J Gene', u'CDR1 Seq', u'CDR1 Length', u'CDR2 Seq', u'CDR2 Length', u'CDR3 Seq', u'CDR3 Length', u'CDR3 Seq DNA', u'CDR3 Length DNA', u'Strand', u'CDR3 Found How', u'Sample', u'Replicate'] | 
|  | 35 | 
|  | 36 	result = None | 
|  | 37 	for patient_id, samples in patients.iteritems(): | 
|  | 38 		count = 1 | 
|  | 39 		for sample in samples: | 
|  | 40 			sample['Sample'] = patient_id | 
|  | 41 			sample['Replicate'] = str(count) | 
|  | 42 			count += 1 | 
|  | 43 			if result is None: | 
|  | 44 				result = sample[columns] | 
|  | 45 			else: | 
|  | 46 				result = result.append(sample[columns]) | 
|  | 47 	result.to_csv(sys.argv[-1], sep="\t", index=False, index_label="index") | 
|  | 48 | 
|  | 49 if __name__ == "__main__": | 
|  | 50 	main() |