Mercurial > repos > rnateam > graphclust_postprocessing
annotate addCdhitseqs.py @ 2:6c88ad83de28 draft
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
author | rnateam |
---|---|
date | Fri, 13 Jan 2017 16:57:54 -0500 |
parents | |
children | dbcea781900e |
rev | line source |
---|---|
2
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
1 import re |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
2 import glob |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
3 import sys |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
4 |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
5 cdhitcluster = sys.argv[1] |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
6 #clusters = sys.argv[2] |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
7 |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
8 cluster_seqs_stats_path = "RESULTS/*.cluster.all" |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
9 cluster_seqs_stats_files = glob.glob(cluster_seqs_stats_path) |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
10 |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
11 #clusterFiles = clusters.split(',') |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
12 repSeqRedSeqdict = {} |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
13 repLine = "" |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
14 count = 0 |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
15 first = False |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
16 |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
17 with open(cdhitcluster, 'r+') as f: |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
18 lines = f.readlines() |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
19 for i in range(0, len(lines)): |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
20 line = lines[i] |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
21 if ">Cluster" in line: |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
22 first = True |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
23 count = 0 |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
24 if i+1 < len(lines): |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
25 repLine = lines[i+1] |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
26 continue |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
27 elif not first: |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
28 count += 1 |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
29 first = False |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
30 else: |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
31 first = False |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
32 lineArr = [] |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
33 if count > 0: |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
34 repLine = repLine.strip() |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
35 rep_FullId = repLine.split()[2] |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
36 rep_FullId = rep_FullId.replace(">", "") |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
37 #rep_short_id = re.findall("_".join(["[^_]+"] * 2), rep_FullId)[0] |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
38 rep_FullId = rep_FullId.replace("...", "") |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
39 line = line.strip() |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
40 add_FullId = line.split()[2] |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
41 add_FullId = add_FullId.replace(">", "") |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
42 add_FullId = add_FullId.replace("...", "") |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
43 #add_short_id = re.findall("_".join(["[^_]+"] * 2), add_FullId)[0] |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
44 lineArr.append(add_FullId) |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
45 repSeqRedSeqdict[rep_FullId] = lineArr |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
46 #lineArr.append(add_short_id) |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
47 #repSeqRedSeqdict[rep_short_id] = lineArr |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
48 |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
49 toWrite = "" |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
50 |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
51 for singleFile in sorted(cluster_seqs_stats_files): |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
52 with open(singleFile, "a+") as clFile: |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
53 file_content = clFile.read() |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
54 first_line = file_content.split('\n')[0] |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
55 for key, val in repSeqRedSeqdict.items(): |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
56 if key in file_content: |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
57 for i in val: |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
58 toWrite += first_line.split()[0] + " " + first_line.split()[1] + " " + first_line.split()[2] + " " + " - " + " " + "CD-Hit" + " " + first_line.split()[5] + " " + "ORIGID" + " " + str(i) + "\n" |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
59 clFile.write(toWrite) |