pre_ebseq_eba: pre_EBSeq.py comparison

comparison pre_EBSeq.py @ 2:340ada00d965 draft

Uploaded

author	yboursin
date	Thu, 20 Oct 2016 08:47:06 -0400
parents
children

comparison

equal deleted inserted replaced

-:00231125f4fb
+:340ada00d965
+#!/usr/bin/env python
+# Author: rachel.legendre@pasteur.fr
+from os.path import basename, join
+from os import getcwd, system
+import argparse
+from shutil import copyfile
+import tempfile
+import csv
+import pandas as pd
+from collections import Counter
+def __main__():
+parser = argparse.ArgumentParser()
+parser.add_argument('--inputs', action='append', nargs='*')
+parser.add_argument('--outvector')
+parser.add_argument('--outtable')
+args = parser.parse_args()
+IGvector = args.outvector
+outtable = args.outtable
+inputs = args.inputs
+working_directory = getcwd()
+dfs = []
+#Build the Expression table from the "expected_count" column of RSEM count table
+for (filename, cond) in inputs:
+# read the csv, making sure the first two columns are str
+df = pd.read_csv(filename,  sep='\t', converters={0: str, 1: str})
+# throw away all but the first two columns
+df = df.iloc[:, [0,1,4]]
+# change the column names so they won't collide during concatenation
+df = df.rename(index=str, columns={"expected_count": cond})
+dfs.append(df)
+# concatenate them horizontally
+df_final = reduce(lambda left, right: pd.merge(left, right, on=['gene_id','transcript_id(s)']), dfs)
+# write it out
+df_final.to_csv(outtable, index=None, sep="\t")
+#get IG vector from the Expression Table
+#The IG Vector is a table with only one column of numbers (integers)
+df2 = pd.read_csv(outtable,  sep='\t', converters={0: str, 1: str})
+ids= df2[['transcript_id(s)', 'gene_id']]
+counts = Counter(ids['gene_id'])
+gene_order = list(ids['gene_id'])
+with open(IGvector, 'wb') as IG:
+for gene in gene_order:
+nbG = counts[gene]
+IG.write(str(nbG) + '\n')
+if __name__ == "__main__":
+__main__()

Mercurial > repos > yboursin > pre_ebseq_eba

comparison pre_EBSeq.py @ 2:340ada00d965 draft