annotate splitSHAPE.py @ 5:f4ad5dceb619 draft

planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
author rnateam
date Wed, 24 May 2017 09:56:11 -0400
parents
children e31c659be8bc
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
1 import os
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
2 import re
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
3 import sys
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
4
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
5 shape_file = sys.argv[1]
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
6 win_size = int(sys.argv[2])
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
7
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
8 pattern = re.compile("^>.*$")
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
9 toWrite = ""
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
10
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
11 count_for_id = 1
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
12 seq_counter = 0
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
13 new_id = ""
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
14
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
15 seq_id = []
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
16 seq_string = []
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
17 orig_id = []
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
18 name_file = "FASTA/data.names"
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
19 array_all_chunks = []
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
20 with open(name_file, 'r') as f:
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
21 content = f.read()
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
22 lines = content.split('\n')[:-1]
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
23 for line in lines:
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
24 seq_id.append(int(line.split()[0]))
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
25 seq_string.append(line.split()[1])
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
26 orig_id_srt = line.split()[3]
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
27 orig_id_srt = orig_id_srt.rsplit('_',1)[0]
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
28 orig_id.append(orig_id_srt)
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
29
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
30
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
31 react_dict = {}
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
32 react_arr = []
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
33
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
34 with open(shape_file, 'r') as shape:
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
35 content = shape.read()
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
36 lines = content.split('\n')
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
37 for line in lines:
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
38 if pattern.match(line):
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
39 line = line.replace('>','').strip()
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
40 react_arr=[]
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
41 react_dict[line] = react_arr
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
42 continue
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
43 else:
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
44 react_arr.append(line)
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
45
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
46 toWrite = ""
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
47 chunks = []
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
48 for i in range(len(orig_id)):
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
49 if not orig_id[i] in react_dict:
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
50 raise RuntimeError('Error key {} not found'.format(orig_id))
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
51
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
52 react_val = react_dict[orig_id[i]]
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
53 toWrite += '>' + str(seq_id[i]) + " " + seq_string[i] + "\n"
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
54 chunks = re.findall(r'\d+', seq_string[i])
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
55
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
56 for j in react_val[int(chunks[1])-1:int(chunks[2])]:
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
57 id_s = int(j.split()[0])
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
58
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
59
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
60 id_s = id_s - int(chunks[1]) + 1
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
61 toWrite += str(id_s) + '\t' + j.split()[1] + "\n"
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
62
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
63 with open("shape_data_split.react", 'w') as out:
f4ad5dceb619 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
diff changeset
64 out.write(toWrite)