Mercurial > repos > davidvanzessen > experimental_design_igg

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/experimental_design.py	Mon Jul 07 09:45:10 2014 -0400
@@ -0,0 +1,44 @@
+import sys
+import pandas as pd
+
+def main():
+	patients = {}
+	files = []
+	sample_id = sys.argv[1]
+	imgt_files = 0
+	blast_files = 0
+	#organize files
+	for arg in sys.argv[2:-2]:
+		if arg.find("/") is -1:
+			patients[sample_id] = files
+			files = []
+			sample_id = arg
+		else:
+			df = pd.read_csv(arg, sep="\t")
+			if "Functionality" in list(df.columns.values):
+				df["VDJ Frame"][df["Functionality"] != "productive"] = "In-frame with stop codon"
+				imgt_files += 1
+			else:
+				blast_files += 1
+			files.append(df)
+	patients[sample_id] = files
+	columns = [u'ID', u'VDJ Frame', u'Top V Gene', u'Top D Gene', u'Top J Gene', u'CDR1 Seq', u'CDR1 Length', u'CDR2 Seq', u'CDR2 Length', u'CDR3 Seq', u'CDR3 Length', u'CDR3 Seq DNA', u'CDR3 Length DNA', u'Strand', u'CDR3 Found How', u'Functionality', 'V-REGION identity %', 'V-REGION identity nt', 'D-REGION reading frame', 'AA JUNCTION', 'Functionality comment', 'Sequence', 'FR1-IMGT', 'FR2-IMGT', 'FR3-IMGT', 'CDR3-IMGT', 'JUNCTION', 'J-REGION', 'FR4-IMGT', 'P3V-nt nb', 'N1-REGION-nt nb', 'P5D-nt nb', 'P3D-nt nb', 'N2-REGION-nt nb', 'P5J-nt nb', '3V-REGION trimmed-nt nb', '5D-REGION trimmed-nt nb', '3D-REGION trimmed-nt nb', '5J-REGION trimmed-nt nb', u'Sample', u'Replicate']
+	if blast_files is not 0:
+		print "Has a parsed blastn file, using limited columns."
+		columns = [u'ID', u'VDJ Frame', u'Top V Gene', u'Top D Gene', u'Top J Gene', u'CDR1 Seq', u'CDR1 Length', u'CDR2 Seq', u'CDR2 Length', u'CDR3 Seq', u'CDR3 Length', u'CDR3 Seq DNA', u'CDR3 Length DNA', u'Strand', u'CDR3 Found How', u'Sample', u'Replicate']
+
+	result = None
+	for patient_id, samples in patients.iteritems():
+		count = 1
+		for sample in samples:
+			sample['Sample'] = patient_id
+			sample['Replicate'] = str(count)
+			count += 1
+			if result is None:
+				result = sample[columns]
+			else:
+				result = result.append(sample[columns])
+	result.to_csv(sys.argv[-1], sep="\t", index=False, index_label="index")
+
+if __name__ == "__main__":
+	main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/experimental_design.xml	Mon Jul 07 09:45:10 2014 -0400
@@ -0,0 +1,29 @@
+<tool id="experimentaldesign_igg" name="ExperimentalDesign" version="1.0">
+	<description> </description>
+	<command interpreter="python">
+		experimental_design.py
+		#for $i, $f in enumerate($patients)
+            "$f.id"
+            #for $j, $g in enumerate($f.samples)
+            	${g.sample}
+            #end for
+
+		#end for
+		--output $out_file
+	</command>
+	<inputs>
+		<repeat name="patients" title="Patient" min="1" default="1">
+            <repeat name="samples" title="Sample" min="1" default="1">
+                <param name="sample" format="tabular" type="data" label="Sample to Process" />
+            </repeat>
+			<param name="id" type="text" label="ID" />
+		</repeat>
+	</inputs>
+	<outputs>
+		<data format="tabular" name="out_file" />
+	</outputs>
+	<help>
+		Step 3 of the Immune Repertoire tools, merges the parsed reports generated in step 2 into one file with an Sample ID.
+	</help>
+
+</tool>
--- a/igblastmerge.py	Tue Mar 25 06:59:26 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,44 +0,0 @@
-import sys
-import pandas as pd
-
-def main():
-	patients = {}
-	files = []
-	sample_id = sys.argv[1]
-	imgt_files = 0
-	blast_files = 0
-	#organize files
-	for arg in sys.argv[2:-2]:
-		if arg.find("/") is -1:
-			patients[sample_id] = files
-			files = []
-			sample_id = arg
-		else:
-			df = pd.read_csv(arg, sep="\t")
-			if "Functionality" in list(df.columns.values):
-				df["VDJ Frame"][df["Functionality"] != "productive"] = "In-frame with stop codon"
-				imgt_files += 1
-			else:
-				blast_files += 1
-			files.append(df)
-	patients[sample_id] = files
-	columns = [u'ID', u'VDJ Frame', u'Top V Gene', u'Top D Gene', u'Top J Gene', u'CDR1 Seq', u'CDR1 Length', u'CDR2 Seq', u'CDR2 Length', u'CDR3 Seq', u'CDR3 Length', u'CDR3 Seq DNA', u'CDR3 Length DNA', u'Strand', u'CDR3 Found How', u'Functionality', 'V-REGION identity %', 'V-REGION identity nt', 'D-REGION reading frame', 'AA JUNCTION', 'Functionality comment', 'Sequence', 'FR1-IMGT', 'FR2-IMGT', 'FR3-IMGT', 'CDR3-IMGT', 'JUNCTION', 'J-REGION', 'FR4-IMGT', 'P3V-nt nb', 'N1-REGION-nt nb', 'P5D-nt nb', 'P3D-nt nb', 'N2-REGION-nt nb', 'P5J-nt nb', '3V-REGION trimmed-nt nb', '5D-REGION trimmed-nt nb', '3D-REGION trimmed-nt nb', '5J-REGION trimmed-nt nb', u'Sample', u'Replicate']
-	if blast_files is not 0:
-		print "Has a parsed blastn file, using limited columns."
-		columns = [u'ID', u'VDJ Frame', u'Top V Gene', u'Top D Gene', u'Top J Gene', u'CDR1 Seq', u'CDR1 Length', u'CDR2 Seq', u'CDR2 Length', u'CDR3 Seq', u'CDR3 Length', u'CDR3 Seq DNA', u'CDR3 Length DNA', u'Strand', u'CDR3 Found How', u'Sample', u'Replicate']
-
-	result = None
-	for patient_id, samples in patients.iteritems():
-		count = 1
-		for sample in samples:
-			sample['Sample'] = patient_id
-			sample['Replicate'] = str(count)
-			count += 1
-			if result is None:
-				result = sample[columns]
-			else:
-				result = result.append(sample[columns])
-	result.to_csv(sys.argv[-1], sep="\t", index=False, index_label="index")
-
-if __name__ == "__main__":
-	main()
--- a/igblastmerge.xml	Tue Mar 25 06:59:26 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,29 +0,0 @@
-<tool id="experimentaldesign_igg" name="ExperimentalDesign" version="1.0">
-	<description> </description>
-	<command interpreter="python">
-		igblastmerge.py
-		#for $i, $f in enumerate($patients)
-            "$f.id"
-            #for $j, $g in enumerate($f.samples)
-            	${g.sample}
-            #end for
-
-		#end for
-		--output $out_file
-	</command>
-	<inputs>
-		<repeat name="patients" title="Patient" min="1" default="1">
-            <repeat name="samples" title="Sample" min="1" default="1">
-                <param name="sample" format="tabular" type="data" label="Sample to Process" />
-            </repeat>
-			<param name="id" type="text" label="ID" />
-		</repeat>
-	</inputs>
-	<outputs>
-		<data format="tabular" name="out_file" />
-	</outputs>
-	<help>
-		Step 3 of the Immune Repertoire tools, merges the parsed reports generated in step 2 into one file with an Sample ID.
-	</help>
-
-</tool>