annotate scripts/functions.py @ 10:f62c76aab669 draft default tip

planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
author lecorguille
date Mon, 24 Sep 2018 04:34:39 -0400
parents f1e24200e5ae
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
10
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
1 #!/usr/bin/env python
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
2 #coding: utf-8
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
3
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
4 import itertools, os
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
5
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
6 def dico(fasta_file, path_in):
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
7 """
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
8 Stores a fasta file in a dictionary : key/value -> header/sequence
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
9
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
10 Args:
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
11 - fasta_file (String) : the name of fasta file
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
12 - path_in (String) : path to the fasta file
7
f1e24200e5ae planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit b7a3030ea134b5dfad89b1a869db659d72d1145c
abims-sbr
parents: 2
diff changeset
13
10
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
14 Return:
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
15 - bash1 (dict) : the dictionary header/sequence
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
16 """
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
17 bash1 = {}
2
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
18
10
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
19 with open(path_in+'/'+fasta_file, 'r') as F1:
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
20 for h,s in itertools.izip_longest(*[F1]*2):
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
21 fasta_name = h[1:3]
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
22 sequence = s[:-1]
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
23 if fasta_name not in bash1.keys():
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
24 bash1[fasta_name] = sequence
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
25 else:
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
26 print fasta_name
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
27
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
28 return bash1 # same length for all (alignment)
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
29
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
30 def write_output(names, sps_list, out_dir, results_dict):
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
31 """ Write results in csv files. There is one file per counted element (one file per amino-acid, one file per indice ...)
2
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
32
10
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
33 Args:
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
34 - names (list) : list with the names of elems
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
35 - sps_list (list) : species names, sorted alphabetically
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
36 - out_dir (String) : output directory
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
37 - results_dict (dict) : vcounts values of each element for each input file (keys names : elems from 'names argument')
2
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
38
10
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
39 """
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
40 for name in names:
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
41 out = open(name+".csv", 'w')
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
42 out.write('Group,' + sps_list[0:-1]+'\n')
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
43 for group in results_dict.keys():
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
44 count_of_elems = ''
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
45 for specs in sorted(results_dict[group].keys()):
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
46 count_of_elems += str(results_dict[group][specs][name]) + ','
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
47 out.write(group + ',' + count_of_elems[0:-1] + '\n')
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
48 out.close()
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
49 os.system('mv %s.csv %s/' %(name, out_dir))
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
50
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
51 def fill_with_NaN(what):
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
52 """ Used to create a dict only with NaN values ; used when a species is not present in an orthogroup
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
53
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
54 Args:
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
55 - what (list of Strings) : the names of the elements studied (nucleotide, amino-acids, indices of thermostability ...)
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
56
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
57 Return:
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
58 - NaN_values (dict) : dictionary with keys=elems of what, values=NaN
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
59 """
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
60
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
61 NaN_values = {}
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
62 for elem in what:
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
63 NaN_values[elem] = 'NaN'
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
64
f62c76aab669 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
lecorguille
parents: 7
diff changeset
65 return NaN_values