Mercurial > repos > galaxyp > unipept
annotate unipept.py @ 1:b65ee881ca64 draft
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95
author | galaxyp |
---|---|
date | Tue, 22 Jan 2019 20:58:28 -0500 |
parents | b33376bf2290 |
children | dca8a1fe0bf3 |
rev | line source |
---|---|
0
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
1 #!/usr/bin/env python |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
2 """ |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
3 # |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
4 #------------------------------------------------------------------------------ |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
5 # University of Minnesota |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
6 # Copyright 2015, Regents of the University of Minnesota |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
7 #------------------------------------------------------------------------------ |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
8 # Author: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
9 # |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
10 # James E Johnson |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
11 # |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
12 #------------------------------------------------------------------------------ |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
13 """ |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
14 |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
15 import json |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
16 import logging |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
17 import optparse |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
18 from optparse import OptionParser |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
19 import os |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
20 import sys |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
21 import re |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
22 import urllib |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
23 import urllib2 |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
24 |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
25 """ |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
26 pept2taxa json |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
27 pept2lca json |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
28 pept2prot |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
29 pept2ec ecjson ec |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
30 pept2go go |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
31 pept2funct go ec |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
32 peptinfo json ecjson ec go |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
33 |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
34 """ |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
35 |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
36 try: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
37 import xml.etree.cElementTree as ET |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
38 except ImportError: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
39 import xml.etree.ElementTree as ET |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
40 |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
41 def warn_err(msg,exit_code=1): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
42 sys.stderr.write(msg) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
43 if exit_code: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
44 sys.exit(exit_code) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
45 |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
46 go_types = ['biological process', 'molecular function', 'cellular component'] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
47 ec_name_dict = { |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
48 '1' : 'Oxidoreductase', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
49 '1.1' : 'act on the CH-OH group of donors', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
50 '1.2' : 'act on the aldehyde or oxo group of donors', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
51 '1.3' : 'act on the CH-CH group of donors', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
52 '1.4' : 'act on the CH-NH2 group of donors', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
53 '1.5' : 'act on CH-NH group of donors', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
54 '1.6' : 'act on NADH or NADPH', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
55 '1.7' : 'act on other nitrogenous compounds as donors', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
56 '1.8' : 'act on a sulfur group of donors', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
57 '1.9' : 'act on a heme group of donors', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
58 '1.10' : 'act on diphenols and related substances as donors', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
59 '1.11' : 'act on peroxide as an acceptor -- peroxidases', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
60 '1.12' : 'act on hydrogen as a donor', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
61 '1.13' : 'act on single donors with incorporation of molecular oxygen', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
62 '1.14' : 'act on paired donors with incorporation of molecular oxygen', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
63 '1.15' : 'act on superoxide radicals as acceptors', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
64 '1.16' : 'oxidize metal ions', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
65 '1.17' : 'act on CH or CH2 groups', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
66 '1.18' : 'act on iron-sulfur proteins as donors', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
67 '1.19' : 'act on reduced flavodoxin as donor', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
68 '1.20' : 'act on phosphorus or arsenic as donors', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
69 '1.21' : 'act on X-H and Y-H to form an X-Y bond', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
70 '1.97' : 'other oxidoreductases', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
71 '2' : 'Transferase', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
72 '2.1' : 'transfer one-carbon groups, Methylase', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
73 '2.2' : 'transfer aldehyde or ketone groups', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
74 '2.3' : 'acyltransferases', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
75 '2.4' : 'glycosyltransferases', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
76 '2.5' : 'transfer alkyl or aryl groups, other than methyl groups', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
77 '2.6' : 'transfer nitrogenous groups', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
78 '2.7' : 'transfer phosphorus-containing groups', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
79 '2.8' : 'transfer sulfur-containing groups', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
80 '2.9' : 'transfer selenium-containing groups', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
81 '3' : 'Hydrolase', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
82 '3.1' : 'act on ester bonds', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
83 '3.2' : 'act on sugars - glycosylases', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
84 '3.3' : 'act on ether bonds', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
85 '3.4' : 'act on peptide bonds - Peptidase', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
86 '3.5' : 'act on carbon-nitrogen bonds, other than peptide bonds', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
87 '3.6' : 'act on acid anhydrides', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
88 '3.7' : 'act on carbon-carbon bonds', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
89 '3.8' : 'act on halide bonds', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
90 '3.9' : 'act on phosphorus-nitrogen bonds', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
91 '3.10' : 'act on sulfur-nitrogen bonds', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
92 '3.11' : 'act on carbon-phosphorus bonds', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
93 '3.12' : 'act on sulfur-sulfur bonds', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
94 '3.13' : 'act on carbon-sulfur bonds', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
95 '4' : 'Lyase', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
96 '4.1' : 'carbon-carbon lyases', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
97 '4.2' : 'carbon-oxygen lyases', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
98 '4.3' : 'carbon-nitrogen lyases', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
99 '4.4' : 'carbon-sulfur lyases', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
100 '4.5' : 'carbon-halide lyases', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
101 '4.6' : 'phosphorus-oxygen lyases', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
102 '5' : 'Isomerase', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
103 '5.1' : 'racemases and epimerases', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
104 '5.2' : 'cis-trans-isomerases', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
105 '5.3' : 'intramolecular oxidoreductases', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
106 '5.4' : 'intramolecular transferases -- mutases', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
107 '5.5' : 'intramolecular lyases', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
108 '5.99' : 'other isomerases', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
109 '6' : 'Ligase', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
110 '6.1' : 'form carbon-oxygen bonds', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
111 '6.2' : 'form carbon-sulfur bonds', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
112 '6.3' : 'form carbon-nitrogen bonds', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
113 '6.4' : 'form carbon-carbon bonds', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
114 '6.5' : 'form phosphoric ester bonds', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
115 '6.6' : 'form nitrogen-metal bonds', |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
116 } |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
117 pept2lca_column_order = ['peptide','taxon_rank','taxon_id','taxon_name'] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
118 pept2lca_extra_column_order = ['peptide','superkingdom','kingdom','subkingdom','superphylum','phylum','subphylum','superclass','class','subclass','infraclass','superorder','order','suborder','infraorder','parvorder','superfamily','family','subfamily','tribe','subtribe','genus','subgenus','species_group','species_subgroup','species','subspecies','varietas','forma' ] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
119 pept2lca_all_column_order = pept2lca_column_order + pept2lca_extra_column_order[1:] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
120 pept2prot_column_order = ['peptide','uniprot_id','taxon_id'] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
121 pept2prot_extra_column_order = pept2prot_column_order + ['taxon_name','ec_references','go_references','refseq_ids','refseq_protein_ids','insdc_ids','insdc_protein_ids'] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
122 pept2ec_column_order = [['peptide', 'total_protein_count'], ['ec_number', 'protein_count']] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
123 pept2ec_extra_column_order = [['peptide', 'total_protein_count'], ['ec_number', 'protein_count', 'name']] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
124 pept2go_column_order = [['peptide', 'total_protein_count'], ['go_term', 'protein_count']] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
125 pept2go_extra_column_order = [['peptide', 'total_protein_count'], ['go_term', 'protein_count', 'name']] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
126 pept2funct_column_order = ['peptide', 'total_protein_count', 'ec', 'go'] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
127 |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
128 def __main__(): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
129 version = '2.0' |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
130 pep_pat = '^([ABCDEFGHIKLMNPQRSTVWXYZ]+)$' |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
131 |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
132 def read_tabular(filepath,col): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
133 peptides = [] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
134 with open(filepath) as fp: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
135 for i,line in enumerate(fp): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
136 if line.strip() == '' or line.startswith('#'): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
137 continue |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
138 fields = line.rstrip('\n').split('\t') |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
139 peptide = fields[col] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
140 if not re.match(pep_pat,peptide): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
141 warn_err('"%s" is not a peptide (line %d column %d of tabular file: %s)\n' % (peptide,i,col,filepath),exit_code=invalid_ec) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
142 peptides.append(peptide) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
143 return peptides |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
144 |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
145 def get_fasta_entries(fp): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
146 name, seq = None, [] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
147 for line in fp: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
148 line = line.rstrip() |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
149 if line.startswith(">"): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
150 if name: yield (name, ''.join(seq)) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
151 name, seq = line, [] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
152 else: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
153 seq.append(line) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
154 if name: yield (name, ''.join(seq)) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
155 |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
156 def read_fasta(filepath): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
157 peptides = [] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
158 with open(filepath) as fp: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
159 for id, peptide in get_fasta_entries(fp): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
160 if not re.match(pep_pat,peptide): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
161 warn_err('"%s" is not a peptide (id %s of fasta file: %s)\n' % (peptide,id,filepath),exit_code=invalid_ec) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
162 peptides.append(peptide) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
163 return peptides |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
164 |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
165 def read_mzid(fp): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
166 peptides = [] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
167 for event, elem in ET.iterparse(fp): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
168 if event == 'end': |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
169 if re.search('PeptideSequence',elem.tag): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
170 peptides.append(elem.text) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
171 return peptides |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
172 |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
173 def read_pepxml(fp): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
174 peptides = [] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
175 for event, elem in ET.iterparse(fp): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
176 if event == 'end': |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
177 if re.search('search_hit',elem.tag): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
178 peptides.append(elem.get('peptide')) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
179 return peptides |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
180 |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
181 def best_match(peptide,matches): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
182 if not matches: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
183 return None |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
184 elif len(matches) == 1: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
185 return matches[0].copy() |
1
b65ee881ca64
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95
galaxyp
parents:
0
diff
changeset
|
186 elif 'taxon_rank' in matches[0]: |
0
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
187 # find the most specific match (peptide is always the first column order field) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
188 for col in reversed(pept2lca_extra_column_order[1:]): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
189 col_id = col+"_id" if options.extra else col |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
190 for match in matches: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
191 if 'taxon_rank' in match and match['taxon_rank'] == col: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
192 return match.copy() |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
193 if col_id in match and match[col_id]: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
194 return match.copy() |
1
b65ee881ca64
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95
galaxyp
parents:
0
diff
changeset
|
195 else: |
b65ee881ca64
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95
galaxyp
parents:
0
diff
changeset
|
196 return sorted(matches, key=lambda x: len(x['peptide']))[-1].copy() |
0
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
197 return None |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
198 |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
199 def get_taxon_json(resp): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
200 found_keys = set() |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
201 for i,pdict in enumerate(resp): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
202 found_keys |= set(pdict.keys()) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
203 taxa_cols = [] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
204 for col in pept2lca_extra_column_order[-1:0:-1]: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
205 if col+'_id' in found_keys: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
206 taxa_cols.append(col) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
207 id_to_node = dict() |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
208 def get_node(id,name,rank,child,seq): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
209 if id not in id_to_node: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
210 data = {'count' : 0, 'self_count' : 0, 'valid_taxon' : 1, 'rank' : rank, 'sequences' : [] } |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
211 node = {'id' : id, 'name' : name, 'children' : [], 'kids': [],'data' : data } |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
212 id_to_node[id] = node |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
213 else: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
214 node = id_to_node[id] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
215 node['data']['count'] += 1 |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
216 if seq is not None and seq not in node['data']['sequences']: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
217 node['data']['sequences'].append(seq) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
218 if child is None: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
219 node['data']['self_count'] += 1 |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
220 elif child['id'] not in node['kids']: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
221 node['kids'].append(child['id']) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
222 node['children'].append(child) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
223 return node |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
224 root = get_node(1,'root','no rank',None,None) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
225 for i,pdict in enumerate(resp): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
226 sequence = pdict.get('peptide',pdict.get('tryptic_peptide',None)) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
227 seq = sequence |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
228 child = None |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
229 for col in taxa_cols: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
230 col_id = col+'_id' |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
231 if col_id in pdict and pdict.get(col_id): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
232 col_name = col if col in found_keys else col+'_name' |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
233 child = get_node(pdict.get(col_id,None),pdict.get(col_name,''),col,child,seq) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
234 seq = None |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
235 if child: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
236 get_node(1,'root','no rank',child,None) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
237 return root |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
238 |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
239 def get_ec_json(resp): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
240 ecMap = dict() |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
241 for pdict in resp: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
242 if 'ec' in pdict: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
243 for ec in pdict['ec']: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
244 ec_number = ec['ec_number'] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
245 if ec_number not in ecMap: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
246 ecMap[ec_number] = [] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
247 ecMap[ec_number].append(pdict) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
248 def get_ids(ec): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
249 ids = [] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
250 i = len(ec) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
251 while i >= 0: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
252 ids.append(ec[:i]) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
253 i = ec.rfind('.',0,i - 1) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
254 return ids |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
255 id_to_node = dict() |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
256 def get_node(id,name,child,seq): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
257 if id not in id_to_node: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
258 data = {'count' : 0, 'self_count' : 0, 'sequences' : [] } |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
259 node = {'id' : id, 'name' : name, 'children' : [], 'kids': [],'data' : data } |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
260 id_to_node[id] = node |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
261 else: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
262 node = id_to_node[id] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
263 node['data']['count'] += 1 |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
264 if seq is not None and seq not in node['data']['sequences']: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
265 node['data']['sequences'].append(seq) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
266 if child is None: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
267 node['data']['self_count'] += 1 |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
268 elif child['id'] not in node['kids']: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
269 node['kids'].append(child['id']) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
270 node['children'].append(child) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
271 return node |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
272 root = get_node(0,'-.-.-.-',None,None) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
273 for i in range(1,7): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
274 child = get_node(str(i),'%s\n%s' %(str(i), ec_name_dict[str(i)] ),None,None) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
275 get_node(0,'-.-.-.-',child,None) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
276 for i,pdict in enumerate(resp): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
277 sequence = pdict.get('peptide',pdict.get('tryptic_peptide',None)) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
278 seq = sequence |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
279 if 'ec' in pdict: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
280 for ec in pdict['ec']: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
281 child = None |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
282 protein_count = ec['protein_count'] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
283 ec_number = ec['ec_number'] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
284 for ec_id in get_ids(ec_number): |
1
b65ee881ca64
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95
galaxyp
parents:
0
diff
changeset
|
285 ec_name = str(ec_id) |
b65ee881ca64
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95
galaxyp
parents:
0
diff
changeset
|
286 ## if len(ec_id) == 3: |
b65ee881ca64
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95
galaxyp
parents:
0
diff
changeset
|
287 ## ec_name = '%s\n%s\n%s' %(str(ec_id), ec_name_dict[str(ec_id[0])], ec_name_dict[str(ec_id)]) |
b65ee881ca64
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95
galaxyp
parents:
0
diff
changeset
|
288 child = get_node(ec_id,ec_name,child,seq) |
0
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
289 seq = None |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
290 if child: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
291 get_node(0,'-.-.-.-',child,None) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
292 return root |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
293 |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
294 def get_taxon_dict(resp, column_order, extra=False, names=False): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
295 found_keys = set() |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
296 results = [] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
297 for i,pdict in enumerate(resp): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
298 results.append(pdict) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
299 found_keys |= set(pdict.keys()) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
300 # print >> sys.stderr, "%s\n%s" % (pdict.keys(),found_keys) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
301 column_names = [] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
302 column_keys = [] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
303 for col in column_order: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
304 if col in found_keys: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
305 column_names.append(col) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
306 column_keys.append(col) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
307 elif names: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
308 col_id = col+'_id' |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
309 col_name = col+'_name' |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
310 if extra: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
311 if col_id in found_keys: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
312 column_names.append(col_id) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
313 column_keys.append(col_id) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
314 if names: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
315 if col_name in found_keys: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
316 column_names.append(col) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
317 column_keys.append(col_name) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
318 else: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
319 if col+'_name' in found_keys: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
320 column_names.append(col) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
321 column_keys.append(col+'_name') |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
322 elif col+'_id' in found_keys: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
323 column_names.append(col) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
324 column_keys.append(col+'_id') |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
325 # print >> sys.stderr, "%s\n%s" % (column_names,column_keys) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
326 taxa = dict() ## peptide : [taxonomy] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
327 for i,pdict in enumerate(results): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
328 peptide = pdict['peptide'] if 'peptide' in pdict else None |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
329 if peptide and peptide not in taxa: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
330 vals = [str(pdict[x]) if x in pdict and pdict[x] else '' for x in column_keys] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
331 taxa[peptide] = vals |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
332 return (taxa,column_names) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
333 |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
334 def get_ec_dict(resp, extra=False): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
335 ec_cols = ['ec_numbers', 'ec_protein_counts'] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
336 if extra: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
337 ec_cols.append('ec_names') |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
338 ec_dict = dict() |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
339 for i,pdict in enumerate(resp): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
340 peptide = pdict['peptide'] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
341 ec_numbers = [] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
342 protein_counts = [] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
343 ec_names = [] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
344 if 'ec' in pdict: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
345 for ec in pdict['ec']: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
346 ec_numbers.append(ec['ec_number']) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
347 protein_counts.append(str(ec['protein_count'])) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
348 if extra: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
349 ec_names.append(ec['name'] if 'name' in ec else '') |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
350 vals = [','.join(ec_numbers),','.join(protein_counts)] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
351 if extra: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
352 vals.append(','.join(ec_names)) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
353 ec_dict[peptide] = vals |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
354 return (ec_dict, ec_cols) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
355 |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
356 def get_go_dict(resp, extra=False): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
357 go_cols = ['go_terms', 'go_protein_counts'] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
358 if extra: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
359 go_cols.append('go_names') |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
360 go_dict = dict() |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
361 for i,pdict in enumerate(resp): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
362 peptide = pdict['peptide'] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
363 go_terms = [] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
364 protein_counts = [] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
365 go_names = [] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
366 if 'go' in pdict: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
367 for go in pdict['go']: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
368 if 'go_term' in go: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
369 go_terms.append(go['go_term']) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
370 protein_counts.append(str(go['protein_count'])) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
371 if extra: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
372 go_names.append(go['name'] if 'name' in go else '') |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
373 else: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
374 for go_type in go_types: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
375 if go_type in go: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
376 for _go in go[go_type]: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
377 go_terms.append(_go['go_term']) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
378 protein_counts.append(str(_go['protein_count'])) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
379 if extra: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
380 go_names.append(_go['name'] if 'name' in _go else '') |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
381 vals = [','.join(go_terms),','.join(protein_counts)] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
382 if extra: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
383 vals.append(','.join(go_names)) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
384 go_dict[peptide] = vals |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
385 return (go_dict, go_cols) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
386 |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
387 def write_ec_table(outfile, resp, column_order): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
388 with open(outfile,'w') as fh: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
389 for i,pdict in enumerate(resp): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
390 if 'ec' in pdict: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
391 tvals = [str(pdict[x]) if x in pdict and pdict[x] else '' for x in column_order[0]] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
392 for ec in pdict['ec']: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
393 vals = [str(ec[x]) if x in ec and ec[x] else '' for x in column_order[-1]] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
394 fh.write('%s\n' % '\t'.join(tvals + vals)) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
395 |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
396 def write_go_table(outfile, resp, column_order): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
397 with open(outfile,'w') as fh: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
398 for i,pdict in enumerate(resp): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
399 if 'go' in pdict: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
400 tvals = [str(pdict[x]) if x in pdict and pdict[x] else '' for x in column_order[0]] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
401 for go in pdict['go']: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
402 if 'go_term' in go: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
403 vals = [str(go[x]) if x in go and go[x] else '' for x in column_order[-1]] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
404 fh.write('%s\n' % '\t'.join(tvals + vals)) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
405 else: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
406 for go_type in go_types: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
407 if go_type in go: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
408 for _go in go[go_type]: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
409 vals = [str(_go[x]) if x in _go and _go[x] else '' for x in column_order[-1]] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
410 vals.append(go_type) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
411 fh.write('%s\n' % '\t'.join(tvals + vals)) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
412 |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
413 #Parse Command Line |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
414 parser = optparse.OptionParser() |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
415 # unipept API choice |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
416 parser.add_option( '-a', '--api', dest='unipept', default='pept2lca', choices=['pept2lca','pept2taxa','pept2prot', 'pept2ec', 'pept2go', 'pept2funct', 'peptinfo'], |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
417 help='The unipept application: pept2lca, pept2taxa, pept2prot, pept2ec, pept2go, pept2funct, or peptinfo' ) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
418 # input files |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
419 parser.add_option( '-t', '--tabular', dest='tabular', default=None, help='A tabular file that contains a peptide column' ) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
420 parser.add_option( '-c', '--column', dest='column', type='int', default=0, help='The column (zero-based) in the tabular file that contains peptide sequences' ) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
421 parser.add_option( '-f', '--fasta', dest='fasta', default=None, help='A fasta file containing peptide sequences' ) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
422 parser.add_option( '-m', '--mzid', dest='mzid', default=None, help='A mxIdentML file containing peptide sequences' ) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
423 parser.add_option( '-p', '--pepxml', dest='pepxml', default=None, help='A pepxml file containing peptide sequences' ) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
424 # Unipept Flags |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
425 parser.add_option( '-e', '--equate_il', dest='equate_il', action='store_true', default=False, help='isoleucine (I) and leucine (L) are equated when matching tryptic peptides to UniProt records' ) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
426 parser.add_option( '-x', '--extra', dest='extra', action='store_true', default=False, help='return the complete lineage of the taxonomic lowest common ancestor' ) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
427 parser.add_option( '-n', '--names', dest='names', action='store_true', default=False, help='return the names of all ranks in the lineage of the taxonomic lowest common ancestor' ) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
428 parser.add_option( '-D', '--domains', dest='domains', action='store_true', default=False, help='group response by GO namaspace: biological process, molecular function, cellular component' ) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
429 parser.add_option( '-M', '--max_request', dest='max_request', type='int', default=200, help='The maximum number of entries per unipept request' ) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
430 |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
431 # output fields |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
432 parser.add_option( '-A', '--allfields', dest='allfields', action='store_true', default=False, help='inlcude fields: taxon_rank,taxon_id,taxon_name csv and tsv outputs' ) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
433 # Warn vs Error Flag |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
434 parser.add_option( '-S', '--strict', dest='strict', action='store_true', default=False, help='Print exit on invalid peptide' ) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
435 # output files |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
436 parser.add_option( '-J', '--json', dest='json', default=None, help='Output file path for json formatted results') |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
437 parser.add_option( '-j', '--ec_json', dest='ec_json', default=None, help='Output file path for json formatted results') |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
438 parser.add_option( '-E', '--ec_tsv', dest='ec_tsv', default=None, help='Output file path for EC TAB-separated-values (.tsv) formatted results') |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
439 parser.add_option( '-G', '--go_tsv', dest='go_tsv', default=None, help='Output file path for GO TAB-separated-values (.tsv) formatted results') |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
440 parser.add_option( '-L', '--lineage_tsv', dest='lineage_tsv', default=None, help='Output file path for Lineage TAB-separated-values (.tsv) formatted results') |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
441 parser.add_option( '-T', '--tsv', dest='tsv', default=None, help='Output file path for TAB-separated-values (.tsv) formatted results') |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
442 parser.add_option( '-C', '--csv', dest='csv', default=None, help='Output file path for Comma-separated-values (.csv) formatted results') |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
443 parser.add_option( '-U', '--unmatched', dest='unmatched', default=None, help='Output file path for peptide with no matches' ) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
444 parser.add_option( '-u', '--url', dest='url', default='http://api.unipept.ugent.be/api/v1/', help='unipept url http://api.unipept.ugent.be/api/v1/' ) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
445 # debug |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
446 parser.add_option( '-g', '--get', dest='get', action='store_true', default=False, help='Use GET instead of POST' ) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
447 parser.add_option( '-d', '--debug', dest='debug', action='store_true', default=False, help='Turning on debugging' ) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
448 parser.add_option( '-v', '--version', dest='version', action='store_true', default=False, help='pring version and exit' ) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
449 (options, args) = parser.parse_args() |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
450 if options.version: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
451 print >> sys.stdout,"%s" % version |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
452 sys.exit(0) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
453 invalid_ec = 2 if options.strict else None |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
454 peptides = [] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
455 ## Get peptide sequences |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
456 if options.mzid: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
457 peptides += read_mzid(options.mzid) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
458 if options.pepxml: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
459 peptides += read_pepxml(options.pepxml) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
460 if options.tabular: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
461 peptides += read_tabular(options.tabular,options.column) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
462 if options.fasta: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
463 peptides += read_fasta(options.fasta) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
464 if args and len(args) > 0: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
465 for i,peptide in enumerate(args): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
466 if not re.match(pep_pat,peptide): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
467 warn_err('"%s" is not a peptide (arg %d)\n' % (peptide,i),exit_code=invalid_ec) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
468 peptides.append(peptide) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
469 if len(peptides) < 1: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
470 warn_err("No peptides input!",exit_code=1) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
471 column_order = pept2lca_column_order |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
472 if options.unipept == 'pept2prot': |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
473 column_order = pept2prot_extra_column_order if options.extra else pept2prot_column_order |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
474 else: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
475 if options.extra or options.names: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
476 column_order = pept2lca_all_column_order if options.allfields else pept2lca_extra_column_order |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
477 else: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
478 column_order = pept2lca_column_order |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
479 ## map to tryptic peptides |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
480 pepToParts = {p: re.split("\n", re.sub(r'(?<=[RK])(?=[^P])','\n', p)) for p in peptides} |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
481 partToPeps = {} |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
482 for peptide, parts in pepToParts.iteritems(): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
483 if options.debug: print >> sys.stdout, "peptide: %s\ttryptic: %s\n" % (peptide, parts) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
484 for part in parts: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
485 if len(part) > 50: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
486 warn_err("peptide: %s tryptic fragment len %d > 50 for %s\n" % (peptide,len(part),part),exit_code=None) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
487 if 5 <= len(part) <= 50: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
488 partToPeps.setdefault(part,[]).append(peptide) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
489 trypticPeptides = partToPeps.keys() |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
490 ## unipept |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
491 unipept_resp = [] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
492 idx = range(0,len(trypticPeptides),options.max_request) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
493 idx.append(len(trypticPeptides)) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
494 for i in range(len(idx)-1): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
495 post_data = [] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
496 if options.equate_il: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
497 post_data.append(("equate_il","true")) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
498 if options.names or options.json: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
499 post_data.append(("extra","true")) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
500 post_data.append(("names","true")) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
501 elif options.extra or options.json: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
502 post_data.append(("extra","true")) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
503 if options.domains: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
504 post_data.append(("domains","true")) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
505 post_data += [('input[]', x) for x in trypticPeptides[idx[i]:idx[i+1]]] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
506 if options.debug: print >> sys.stdout, "post_data: %s\n" % (str(post_data)) |
1
b65ee881ca64
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95
galaxyp
parents:
0
diff
changeset
|
507 headers = {'Content-Type': 'application/x-www-form-urlencoded', 'Accept': 'application/json'} |
b65ee881ca64
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95
galaxyp
parents:
0
diff
changeset
|
508 ## headers = {'Accept': 'application/json'} |
0
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
509 url = '%s/%s' % (options.url.rstrip('/'),options.unipept) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
510 if options.get: |
1
b65ee881ca64
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95
galaxyp
parents:
0
diff
changeset
|
511 params = '&'.join(['%s=%s' % (i[0],i[1]) for i in post_data]) |
0
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
512 url = '%s.json?%s' % (url,params) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
513 req = urllib2.Request( url ) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
514 else: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
515 url = '%s.json' % (url) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
516 req = urllib2.Request( url, headers = headers, data = urllib.urlencode(post_data) ) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
517 if options.debug: print >> sys.stdout, "url: %s\n" % (str(url)) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
518 try: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
519 resp = urllib2.urlopen( req ) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
520 if options.debug: print >> sys.stdout,"%s %s\n" % (url,str(resp.getcode())) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
521 if resp.getcode() == 200: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
522 unipept_resp += json.loads( urllib2.urlopen( req ).read() ) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
523 except Exception, e: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
524 warn_err('HTTP Error %s\n' % (str(e)),exit_code=None) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
525 unmatched_peptides = [] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
526 peptideMatches = [] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
527 if options.debug: print >> sys.stdout,"unipept response: %s\n" % str(unipept_resp) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
528 if options.unipept in ['pept2prot', 'pept2taxa']: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
529 dupkey = 'uniprot_id' if options.unipept == 'pept2prot' else 'taxon_id' ## should only keep one of these per input peptide |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
530 ## multiple entries per trypticPeptide for pep2prot or pep2taxa |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
531 mapping = {} |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
532 for match in unipept_resp: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
533 mapping.setdefault(match['peptide'],[]).append(match) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
534 for peptide in peptides: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
535 # Get the intersection of matches to the tryptic parts |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
536 keyToMatch = None |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
537 for part in pepToParts[peptide]: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
538 if part in mapping: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
539 temp = {match[dupkey] : match for match in mapping[part]} |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
540 if keyToMatch: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
541 dkeys = set(keyToMatch.keys()) - set(temp.keys()) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
542 for k in dkeys: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
543 del keyToMatch[k] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
544 else: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
545 keyToMatch = temp |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
546 ## keyToMatch = keyToMatch.fromkeys([x for x in keyToMatch if x in temp]) if keyToMatch else temp |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
547 if not keyToMatch: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
548 unmatched_peptides.append(peptide) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
549 else: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
550 for key,match in keyToMatch.iteritems(): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
551 match['tryptic_peptide'] = match['peptide'] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
552 match['peptide'] = peptide |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
553 peptideMatches.append(match) |
1
b65ee881ca64
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95
galaxyp
parents:
0
diff
changeset
|
554 elif options.unipept in ['pept2lca', 'peptinfo']: |
0
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
555 ## should be one response per trypticPeptide for pep2lca |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
556 respMap = {v['peptide']:v for v in unipept_resp} |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
557 ## map resp back to peptides |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
558 for peptide in peptides: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
559 matches = list() |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
560 for part in pepToParts[peptide]: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
561 if part in respMap: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
562 matches.append(respMap[part]) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
563 match = best_match(peptide,matches) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
564 if not match: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
565 unmatched_peptides.append(peptide) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
566 longest_tryptic_peptide = sorted(pepToParts[peptide], key=lambda x: len(x))[-1] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
567 match = {'peptide' : longest_tryptic_peptide} |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
568 match['tryptic_peptide'] = match['peptide'] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
569 match['peptide'] = peptide |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
570 peptideMatches.append(match) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
571 else: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
572 respMap = {v['peptide']:v for v in unipept_resp} |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
573 ## map resp back to peptides |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
574 for peptide in peptides: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
575 matches = list() |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
576 for part in pepToParts[peptide]: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
577 if part in respMap and 'total_protein_count' in respMap[part]: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
578 matches.append(respMap[part]) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
579 match = best_match(peptide,matches) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
580 if not match: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
581 unmatched_peptides.append(peptide) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
582 longest_tryptic_peptide = sorted(pepToParts[peptide], key=lambda x: len(x))[-1] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
583 match = {'peptide' : longest_tryptic_peptide} |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
584 match['tryptic_peptide'] = match['peptide'] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
585 match['peptide'] = peptide |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
586 peptideMatches.append(match) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
587 resp = peptideMatches |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
588 if options.debug: print >> sys.stdout,"\nmapped response: %s\n" % str(resp) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
589 ## output results |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
590 if not (options.unmatched or options.json or options.tsv or options.csv): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
591 print >> sys.stdout, str(resp) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
592 if options.unmatched: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
593 with open(options.unmatched,'w') as outputFile: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
594 for peptide in peptides: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
595 if peptide in unmatched_peptides: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
596 outputFile.write("%s\n" % peptide) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
597 if options.json: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
598 if options.unipept in ['pept2lca', 'pept2taxa', 'peptinfo']: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
599 root = get_taxon_json(resp) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
600 with open(options.json,'w') as outputFile: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
601 outputFile.write(json.dumps(root)) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
602 elif options.unipept in ['pept2prot', 'pept2ec', 'pept2go', 'pept2funct']: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
603 with open(options.json,'w') as outputFile: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
604 outputFile.write(str(resp)) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
605 if options.ec_json: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
606 if options.unipept in ['pept2ec', 'pept2funct', 'peptinfo']: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
607 root = get_ec_json(resp) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
608 with open(options.ec_json,'w') as outputFile: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
609 outputFile.write(json.dumps(root)) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
610 if options.tsv or options.csv: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
611 rows = [] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
612 column_names = None |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
613 if options.unipept in ['pept2ec', 'pept2go', 'pept2funct', 'peptinfo']: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
614 taxa = None |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
615 ec_dict = None |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
616 go_dict = None |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
617 if options.unipept in ['peptinfo']: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
618 (taxa,taxon_cols) = get_taxon_dict(resp, column_order, extra=options.extra, names=options.names) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
619 if options.unipept in ['pept2ec', 'pept2funct', 'peptinfo']: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
620 (ec_dict,ec_cols) = get_ec_dict(resp, extra=options.extra) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
621 if options.unipept in ['pept2go', 'pept2funct', 'peptinfo']: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
622 (go_dict,go_cols) = get_go_dict(resp, extra=options.extra) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
623 for i,pdict in enumerate(resp): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
624 peptide = pdict['peptide'] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
625 total_protein_count = str(pdict['total_protein_count']) if 'total_protein_count' in pdict else '0' |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
626 column_names = ['peptide', 'total_protein_count'] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
627 vals = [peptide,total_protein_count] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
628 if ec_dict: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
629 vals += ec_dict[peptide] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
630 column_names += ec_cols |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
631 if go_dict: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
632 vals += go_dict[peptide] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
633 column_names += go_cols |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
634 if taxa: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
635 vals += taxa[peptide][1:] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
636 column_names += taxon_cols[1:] |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
637 rows.append(vals) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
638 elif options.unipept in ['pept2lca', 'pept2taxa', 'pept2prot']: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
639 (taxa,taxon_cols) = get_taxon_dict(resp, column_order, extra=options.extra, names=options.names) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
640 column_names = taxon_cols |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
641 rows = taxa.values() |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
642 for peptide,vals in taxa.iteritems(): |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
643 rows.append(vals) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
644 if options.tsv: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
645 with open(options.tsv,'w') as outputFile: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
646 if column_names: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
647 outputFile.write("#%s\n"% '\t'.join(column_names)) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
648 for vals in rows: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
649 outputFile.write("%s\n"% '\t'.join(vals)) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
650 if options.csv: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
651 with open(options.csv,'w') as outputFile: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
652 if column_names: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
653 outputFile.write("%s\n"% ','.join(column_names)) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
654 for vals in rows: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
655 outputFile.write("%s\n"% ','.join(['"%s"' % (v if v else '') for v in vals])) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
656 if options.ec_tsv and options.unipept in ['pept2ec', 'pept2funct', 'peptinfo']: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
657 column_order = pept2ec_extra_column_order if options.extra else pept2ec_column_order |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
658 write_ec_table(options.ec_tsv, resp, column_order) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
659 if options.go_tsv and options.unipept in ['pept2go', 'pept2funct', 'peptinfo']: |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
660 column_order = pept2go_extra_column_order if options.extra else pept2go_column_order |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
661 write_go_table(options.go_tsv, resp, column_order) |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
662 |
b33376bf2290
planemo upload for repository http://unipept.ugent.be/apidocs commit 704a0414b2547298b2596219998002491505d927-dirty
galaxyp
parents:
diff
changeset
|
663 if __name__ == "__main__" : __main__() |