|
2
|
1 #!/usr/bin/env python
|
|
|
2 # Author: rachel.legendre@pasteur.fr
|
|
|
3
|
|
|
4 from os.path import basename, join
|
|
|
5 from os import getcwd, system
|
|
|
6 import argparse
|
|
|
7 from shutil import copyfile
|
|
|
8 import tempfile
|
|
|
9 import csv
|
|
|
10 import pandas as pd
|
|
|
11 from collections import Counter
|
|
|
12
|
|
|
13 def __main__():
|
|
|
14 parser = argparse.ArgumentParser()
|
|
|
15 parser.add_argument('--inputs', action='append', nargs='*')
|
|
|
16 parser.add_argument('--outvector')
|
|
|
17 parser.add_argument('--outtable')
|
|
|
18 args = parser.parse_args()
|
|
|
19
|
|
|
20 IGvector = args.outvector
|
|
|
21 outtable = args.outtable
|
|
|
22 inputs = args.inputs
|
|
|
23 working_directory = getcwd()
|
|
|
24
|
|
|
25 dfs = []
|
|
|
26
|
|
|
27 #Build the Expression table from the "expected_count" column of RSEM count table
|
|
|
28 for (filename, cond) in inputs:
|
|
|
29 # read the csv, making sure the first two columns are str
|
|
|
30 df = pd.read_csv(filename, sep='\t', converters={0: str, 1: str})
|
|
|
31 # throw away all but the first two columns
|
|
|
32 df = df.iloc[:, [0,1,4]]
|
|
|
33 # change the column names so they won't collide during concatenation
|
|
|
34 df = df.rename(index=str, columns={"expected_count": cond})
|
|
|
35 dfs.append(df)
|
|
|
36 # concatenate them horizontally
|
|
|
37 df_final = reduce(lambda left, right: pd.merge(left, right, on=['gene_id','transcript_id(s)']), dfs)
|
|
|
38 # write it out
|
|
|
39 df_final.to_csv(outtable, index=None, sep="\t")
|
|
|
40
|
|
|
41
|
|
|
42 #get IG vector from the Expression Table
|
|
|
43 #The IG Vector is a table with only one column of numbers (integers)
|
|
|
44 df2 = pd.read_csv(outtable, sep='\t', converters={0: str, 1: str})
|
|
|
45 ids= df2[['transcript_id(s)', 'gene_id']]
|
|
|
46 counts = Counter(ids['gene_id'])
|
|
|
47 gene_order = list(ids['gene_id'])
|
|
|
48 with open(IGvector, 'wb') as IG:
|
|
|
49 for gene in gene_order:
|
|
|
50 nbG = counts[gene]
|
|
|
51 IG.write(str(nbG) + '\n')
|
|
|
52
|
|
|
53 if __name__ == "__main__":
|
|
|
54 __main__()
|