annotate mqppep_mrgfltr.py @ 5:d4d531006735 draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
author eschen42
date Thu, 10 Mar 2022 23:42:48 +0000
parents c1403d18c189
children 922d309640db
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1 #!/usr/bin/env python
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
2
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
3 # Import the packages needed
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
4 import argparse
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
5 import operator # for operator.itemgetter
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
6 import os.path
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
7 import re
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
8 import shutil # for shutil.copyfile(src, dest)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
9 import sqlite3 as sql
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
10 import sys # import the sys module for exc_info
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
11 import time
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
12 import traceback # for formatting stack-trace
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
13 from codecs import getreader as cx_getreader
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
14
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
15 import numpy as np
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
16 import pandas
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
17
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
18 # global constants
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
19 N_A = "N/A"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
20
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
21
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
22 # ref: https://stackoverflow.com/a/8915613/15509512
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
23 # answers: "How to handle exceptions in a list comprehensions"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
24 # usage:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
25 # from math import log
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
26 # eggs = [1,3,0,3,2]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
27 # print([x for x in [catch(log, egg) for egg in eggs] if x is not None])
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
28 # producing:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
29 # for <built-in function log>
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
30 # with args (0,)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
31 # exception: math domain error
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
32 # [0.0, 1.0986122886681098, 1.0986122886681098, 0.6931471805599453]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
33 def catch(func, *args, handle=lambda e: e, **kwargs):
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
34
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
35 try:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
36 return func(*args, **kwargs)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
37 except Exception as e:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
38 print("For %s" % str(func))
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
39 print(" with args %s" % str(args))
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
40 print(" caught exception: %s" % str(e))
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
41 (ty, va, tb) = sys.exc_info()
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
42 print(" stack trace: " + str(traceback.format_exception(ty, va, tb)))
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
43 exit(-1)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
44 return None
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
45
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
46
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
47 def ppep_join(x):
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
48 x = [i for i in x if N_A != i]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
49 result = "%s" % " | ".join(x)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
50 if result != "":
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
51 return result
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
52 else:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
53 return N_A
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
54
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
55
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
56 def melt_join(x):
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
57 tmp = {key.lower(): key for key in x}
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
58 result = "%s" % " | ".join([tmp[key] for key in tmp])
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
59 return result
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
60
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
61
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
62 def __main__():
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
63 # Parse Command Line
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
64 parser = argparse.ArgumentParser(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
65 description="Phopsphoproteomic Enrichment Pipeline Merge and Filter."
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
66 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
67
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
68 # inputs:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
69 # Phosphopeptide data for experimental results, including the intensities
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
70 # and the mapping to kinase domains, in tabular format.
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
71 parser.add_argument(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
72 "--phosphopeptides",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
73 "-p",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
74 nargs=1,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
75 required=True,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
76 dest="phosphopeptides",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
77 help="Phosphopeptide data for experimental results, including the intensities and the mapping to kinase domains, in tabular format",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
78 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
79 # UniProtKB/SwissProt DB input, SQLite
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
80 parser.add_argument(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
81 "--ppep_mapping_db",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
82 "-d",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
83 nargs=1,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
84 required=True,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
85 dest="ppep_mapping_db",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
86 help="UniProtKB/SwissProt SQLite Database",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
87 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
88 # species to limit records chosed from PhosPhositesPlus
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
89 parser.add_argument(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
90 "--species",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
91 "-x",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
92 nargs=1,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
93 required=False,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
94 default=[],
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
95 dest="species",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
96 help="limit PhosphoSitePlus records to indicated species (field may be empty)",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
97 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
98
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
99 # outputs:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
100 # tabular output
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
101 parser.add_argument(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
102 "--mrgfltr_tab",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
103 "-o",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
104 nargs=1,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
105 required=True,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
106 dest="mrgfltr_tab",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
107 help="Tabular output file for results",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
108 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
109 # CSV output
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
110 parser.add_argument(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
111 "--mrgfltr_csv",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
112 "-c",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
113 nargs=1,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
114 required=True,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
115 dest="mrgfltr_csv",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
116 help="CSV output file for results",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
117 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
118 # SQLite output
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
119 parser.add_argument(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
120 "--mrgfltr_sqlite",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
121 "-S",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
122 nargs=1,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
123 required=True,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
124 dest="mrgfltr_sqlite",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
125 help="SQLite output file for results",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
126 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
127
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
128 # "Make it so!" (parse the arguments)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
129 options = parser.parse_args()
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
130 print("options: " + str(options))
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
131
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
132 # determine phosphopeptide ("upstream map") input tabular file access
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
133 if options.phosphopeptides is None:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
134 exit('Argument "phosphopeptides" is required but not supplied')
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
135 try:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
136 upstream_map_filename_tab = os.path.abspath(options.phosphopeptides[0])
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
137 input_file = open(upstream_map_filename_tab, "r")
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
138 input_file.close()
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
139 except Exception as e:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
140 exit("Error parsing phosphopeptides argument: %s" % str(e))
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
141
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
142 # determine input SQLite access
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
143 if options.ppep_mapping_db is None:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
144 exit('Argument "ppep_mapping_db" is required but not supplied')
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
145 try:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
146 uniprot_sqlite = os.path.abspath(options.ppep_mapping_db[0])
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
147 input_file = open(uniprot_sqlite, "rb")
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
148 input_file.close()
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
149 except Exception as e:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
150 exit("Error parsing ppep_mapping_db argument: %s" % str(e))
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
151
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
152 # copy input SQLite dataset to output SQLite dataset
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
153 if options.mrgfltr_sqlite is None:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
154 exit('Argument "mrgfltr_sqlite" is required but not supplied')
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
155 try:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
156 output_sqlite = os.path.abspath(options.mrgfltr_sqlite[0])
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
157 shutil.copyfile(uniprot_sqlite, output_sqlite)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
158 except Exception as e:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
159 exit("Error copying ppep_mapping_db to mrgfltr_sqlite: %s" % str(e))
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
160
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
161 # determine species to limit records from PSP_Regulatory_Sites
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
162 if options.species is None:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
163 exit(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
164 'Argument "species" is required (and may be empty) but not supplied'
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
165 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
166 try:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
167 if len(options.species) > 0:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
168 species = options.species[0]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
169 else:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
170 species = ""
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
171 except Exception as e:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
172 exit("Error parsing species argument: %s" % str(e))
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
173
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
174 # determine tabular output destination
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
175 if options.mrgfltr_tab is None:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
176 exit('Argument "mrgfltr_tab" is required but not supplied')
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
177 try:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
178 output_filename_tab = os.path.abspath(options.mrgfltr_tab[0])
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
179 output_file = open(output_filename_tab, "w")
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
180 output_file.close()
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
181 except Exception as e:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
182 exit("Error parsing mrgfltr_tab argument: %s" % str(e))
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
183
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
184 # determine CSV output destination
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
185 if options.mrgfltr_csv is None:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
186 exit('Argument "mrgfltr_csv" is required but not supplied')
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
187 try:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
188 output_filename_csv = os.path.abspath(options.mrgfltr_csv[0])
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
189 output_file = open(output_filename_csv, "w")
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
190 output_file.close()
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
191 except Exception as e:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
192 exit("Error parsing mrgfltr_csv argument: %s" % str(e))
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
193
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
194 def mqpep_getswissprot():
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
195
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
196 #
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
197 # copied from Excel Output Script.ipynb BEGIN #
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
198 #
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
199
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
200 # String Constants #################
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
201 DEPHOSPHOPEP = "DephosphoPep"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
202 DESCRIPTION = "Description"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
203 FUNCTION_PHOSPHORESIDUE = (
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
204 "Function Phosphoresidue(PSP=PhosphoSitePlus.org)"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
205 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
206 GENE_NAME = "Gene_Name" # Gene Name from UniProtKB
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
207 ON_FUNCTION = (
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
208 "ON_FUNCTION" # ON_FUNCTION column from PSP_Regulatory_Sites
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
209 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
210 ON_NOTES = "NOTES" # NOTES column from PSP_Regulatory_Sites
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
211 ON_OTHER_INTERACT = "ON_OTHER_INTERACT" # ON_OTHER_INTERACT column from PSP_Regulatory_Sites
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
212 ON_PROCESS = (
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
213 "ON_PROCESS" # ON_PROCESS column from PSP_Regulatory_Sites
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
214 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
215 ON_PROT_INTERACT = "ON_PROT_INTERACT" # ON_PROT_INTERACT column from PSP_Regulatory_Sites
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
216 PHOSPHOPEPTIDE = "Phosphopeptide"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
217 PHOSPHOPEPTIDE_MATCH = "Phosphopeptide_match"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
218 PHOSPHORESIDUE = "Phosphoresidue"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
219 PUTATIVE_UPSTREAM_DOMAINS = "Putative Upstream Kinases(PSP=PhosphoSitePlus.org)/Phosphatases/Binding Domains"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
220 SEQUENCE = "Sequence"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
221 SEQUENCE10 = "Sequence10"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
222 SEQUENCE7 = "Sequence7"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
223 SITE_PLUSMINUS_7AA_SQL = "SITE_PLUSMINUS_7AA"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
224 UNIPROT_ID = "UniProt_ID"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
225 UNIPROT_SEQ_AND_META_SQL = """
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
226 select Uniprot_ID, Description, Gene_Name, Sequence,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
227 Organism_Name, Organism_ID, PE, SV
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
228 from UniProtKB
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
229 order by Sequence, UniProt_ID
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
230 """
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
231 UNIPROT_UNIQUE_SEQ_SQL = """
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
232 select distinct Sequence
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
233 from UniProtKB
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
234 group by Sequence
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
235 """
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
236 PPEP_PEP_UNIPROTSEQ_SQL = """
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
237 select distinct phosphopeptide, peptide, sequence
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
238 from uniprotkb_pep_ppep_view
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
239 order by sequence
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
240 """
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
241 PPEP_MELT_SQL = """
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
242 SELECT DISTINCT
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
243 phospho_peptide AS 'p_peptide',
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
244 kinase_map AS 'characterization',
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
245 'X' AS 'X'
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
246 FROM ppep_gene_site_view
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
247 """
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
248 # CREATE TABLE PSP_Regulatory_site (
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
249 # site_plusminus_7AA TEXT PRIMARY KEY ON CONFLICT IGNORE,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
250 # domain TEXT,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
251 # ON_FUNCTION TEXT,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
252 # ON_PROCESS TEXT,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
253 # ON_PROT_INTERACT TEXT,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
254 # ON_OTHER_INTERACT TEXT,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
255 # notes TEXT,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
256 # organism TEXT
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
257 # );
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
258 PSP_REGSITE_SQL = """
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
259 SELECT DISTINCT
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
260 SITE_PLUSMINUS_7AA ,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
261 DOMAIN ,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
262 ON_FUNCTION ,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
263 ON_PROCESS ,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
264 ON_PROT_INTERACT ,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
265 ON_OTHER_INTERACT ,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
266 NOTES ,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
267 ORGANISM
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
268 FROM PSP_Regulatory_site
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
269 """
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
270 PPEP_ID_SQL = """
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
271 SELECT
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
272 id AS 'ppep_id',
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
273 seq AS 'ppep_seq'
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
274 FROM ppep
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
275 """
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
276 MRGFLTR_DDL = """
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
277 DROP VIEW IF EXISTS mrgfltr_metadata_view;
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
278 DROP TABLE IF EXISTS mrgfltr_metadata;
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
279 CREATE TABLE mrgfltr_metadata
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
280 ( ppep_id INTEGER REFERENCES ppep(id)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
281 , Sequence10 TEXT
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
282 , Sequence7 TEXT
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
283 , GeneName TEXT
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
284 , Phosphoresidue TEXT
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
285 , UniProtID TEXT
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
286 , Description TEXT
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
287 , FunctionPhosphoresidue TEXT
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
288 , PutativeUpstreamDomains TEXT
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
289 , PRIMARY KEY (ppep_id) ON CONFLICT IGNORE
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
290 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
291 ;
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
292 CREATE VIEW mrgfltr_metadata_view AS
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
293 SELECT DISTINCT
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
294 ppep.seq AS phospho_peptide
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
295 , Sequence10
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
296 , Sequence7
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
297 , GeneName
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
298 , Phosphoresidue
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
299 , UniProtID
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
300 , Description
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
301 , FunctionPhosphoresidue
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
302 , PutativeUpstreamDomains
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
303 FROM
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
304 ppep, mrgfltr_metadata
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
305 WHERE
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
306 mrgfltr_metadata.ppep_id = ppep.id
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
307 ORDER BY
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
308 ppep.seq
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
309 ;
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
310 """
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
311
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
312 CITATION_INSERT_STMT = """
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
313 INSERT INTO Citation (
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
314 ObjectName,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
315 CitationData
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
316 ) VALUES (?,?)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
317 """
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
318 CITATION_INSERT_PSP = 'PhosphoSitePlus(R) (PSP) was created by Cell Signaling Technology Inc. It is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License. When using PSP data or analyses in printed publications or in online resources, the following acknowledgements must be included: (a) the words "PhosphoSitePlus(R), www.phosphosite.org" must be included at appropriate places in the text or webpage, and (b) the following citation must be included in the bibliography: "Hornbeck PV, Zhang B, Murray B, Kornhauser JM, Latham V, Skrzypek E PhosphoSitePlus, 2014: mutations, PTMs and recalibrations. Nucleic Acids Res. 2015 43:D512-20. PMID: 25514926."'
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
319 CITATION_INSERT_PSP_REF = 'Hornbeck, 2014, "PhosphoSitePlus, 2014: mutations, PTMs and recalibrations.", https://pubmed.ncbi.nlm.nih.gov/22135298, https://doi.org/10.1093/nar/gkr1122'
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
320
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
321 MRGFLTR_METADATA_COLUMNS = [
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
322 "ppep_id",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
323 "Sequence10",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
324 "Sequence7",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
325 "GeneName",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
326 "Phosphoresidue",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
327 "UniProtID",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
328 "Description",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
329 "FunctionPhosphoresidue",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
330 "PutativeUpstreamDomains",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
331 ]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
332
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
333 # String Constants (end) ############
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
334
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
335 class Error(Exception):
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
336 """Base class for exceptions in this module."""
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
337
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
338 pass
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
339
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
340 class PreconditionError(Error):
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
341 """Exception raised for errors in the input.
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
342
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
343 Attributes:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
344 expression -- input expression in which the error occurred
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
345 message -- explanation of the error
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
346 """
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
347
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
348 def __init__(self, expression, message):
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
349 self.expression = expression
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
350 self.message = message
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
351
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
352 # start_time = time.clock() #timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
353 start_time = time.process_time() # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
354
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
355 # get keys from upstream tabular file using readline()
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
356 # ref: https://stackoverflow.com/a/16713581/15509512
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
357 # answer to "Use codecs to read file with correct encoding"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
358 file1_encoded = open(upstream_map_filename_tab, "rb")
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
359 file1 = cx_getreader("latin-1")(file1_encoded)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
360
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
361 count = 0
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
362 upstream_map_p_peptide_list = []
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
363 re_tab = re.compile("^[^\t]*")
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
364 while True:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
365 count += 1
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
366 # Get next line from file
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
367 line = file1.readline()
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
368 # if line is empty
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
369 # end of file is reached
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
370 if not line:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
371 break
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
372 if count > 1:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
373 m = re_tab.match(line)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
374 upstream_map_p_peptide_list.append(m[0])
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
375 file1.close()
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
376 file1_encoded.close()
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
377
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
378 # Get the list of phosphopeptides with the p's that represent the phosphorylation sites removed
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
379 re_phos = re.compile("p")
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
380
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
381 end_time = time.process_time() # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
382 print(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
383 "%0.6f pre-read-SwissProt [0.1]" % (end_time - start_time,),
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
384 file=sys.stderr,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
385 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
386
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
387 # ----------- Get SwissProt data from SQLite database (start) -----------
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
388 # build UniProt sequence LUT and list of unique SwissProt sequences
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
389
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
390 # Open SwissProt SQLite database
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
391 conn = sql.connect(uniprot_sqlite)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
392 cur = conn.cursor()
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
393
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
394 # Set up structures to hold SwissProt data
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
395
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
396 uniprot_Sequence_List = []
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
397 UniProtSeqLUT = {}
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
398
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
399 # Execute query for unique seqs without fetching the results yet
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
400 uniprot_unique_seq_cur = cur.execute(UNIPROT_UNIQUE_SEQ_SQL)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
401
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
402 while 1:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
403 batch = uniprot_unique_seq_cur.fetchmany(size=50)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
404 if not batch:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
405 # handle case where no records are returned
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
406 break
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
407 for row in batch:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
408 Sequence = row[0]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
409 UniProtSeqLUT[(Sequence, DESCRIPTION)] = []
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
410 UniProtSeqLUT[(Sequence, GENE_NAME)] = []
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
411 UniProtSeqLUT[(Sequence, UNIPROT_ID)] = []
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
412 UniProtSeqLUT[Sequence] = []
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
413
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
414 # Execute query for seqs and metadata without fetching the results yet
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
415 uniprot_seq_and_meta = cur.execute(UNIPROT_SEQ_AND_META_SQL)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
416
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
417 while 1:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
418 batch = uniprot_seq_and_meta.fetchmany(size=50)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
419 if not batch:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
420 # handle case where no records are returned
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
421 break
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
422 for (
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
423 UniProt_ID,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
424 Description,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
425 Gene_Name,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
426 Sequence,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
427 OS,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
428 OX,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
429 PE,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
430 SV,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
431 ) in batch:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
432 uniprot_Sequence_List.append(Sequence)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
433 UniProtSeqLUT[Sequence] = Sequence
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
434 UniProtSeqLUT[(Sequence, UNIPROT_ID)].append(UniProt_ID)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
435 UniProtSeqLUT[(Sequence, GENE_NAME)].append(Gene_Name)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
436 if OS != N_A:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
437 Description += " OS=" + OS
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
438 if OX != N_A:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
439 Description += " OX=" + str(int(OX))
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
440 if Gene_Name != N_A:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
441 Description += " GN=" + Gene_Name
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
442 if PE != N_A:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
443 Description += " PE=" + PE
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
444 if SV != N_A:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
445 Description += " SV=" + SV
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
446 UniProtSeqLUT[(Sequence, DESCRIPTION)].append(Description)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
447
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
448 # Close SwissProt SQLite database; clean up local variables
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
449 conn.close()
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
450 Sequence = ""
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
451 UniProt_ID = ""
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
452 Description = ""
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
453 Gene_Name = ""
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
454
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
455 # ----------- Get SwissProt data from SQLite database (finish) -----------
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
456
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
457 end_time = time.process_time() # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
458 print(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
459 "%0.6f post-read-SwissProt [0.2]" % (end_time - start_time,),
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
460 file=sys.stderr,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
461 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
462
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
463 # ----------- Get SwissProt data from SQLite database (start) -----------
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
464 # Open SwissProt SQLite database
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
465 conn = sql.connect(uniprot_sqlite)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
466 cur = conn.cursor()
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
467
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
468 # Set up dictionary to aggregate results for phosphopeptides correspounding to dephosphoeptide
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
469 DephosphoPep_UniProtSeq_LUT = {}
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
470
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
471 # Set up dictionary to accumulate results
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
472 PhosphoPep_UniProtSeq_LUT = {}
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
473
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
474 # Execute query for tuples without fetching the results yet
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
475 ppep_pep_uniprotseq_cur = cur.execute(PPEP_PEP_UNIPROTSEQ_SQL)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
476
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
477 while 1:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
478 batch = ppep_pep_uniprotseq_cur.fetchmany(size=50)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
479 if not batch:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
480 # handle case where no records are returned
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
481 break
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
482 for (phospho_pep, dephospho_pep, sequence) in batch:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
483 # do interesting stuff here...
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
484 PhosphoPep_UniProtSeq_LUT[phospho_pep] = phospho_pep
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
485 PhosphoPep_UniProtSeq_LUT[
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
486 (phospho_pep, DEPHOSPHOPEP)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
487 ] = dephospho_pep
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
488 if dephospho_pep not in DephosphoPep_UniProtSeq_LUT:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
489 DephosphoPep_UniProtSeq_LUT[dephospho_pep] = set()
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
490 DephosphoPep_UniProtSeq_LUT[
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
491 (dephospho_pep, DESCRIPTION)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
492 ] = []
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
493 DephosphoPep_UniProtSeq_LUT[
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
494 (dephospho_pep, GENE_NAME)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
495 ] = []
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
496 DephosphoPep_UniProtSeq_LUT[
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
497 (dephospho_pep, UNIPROT_ID)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
498 ] = []
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
499 DephosphoPep_UniProtSeq_LUT[(dephospho_pep, SEQUENCE)] = []
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
500 DephosphoPep_UniProtSeq_LUT[dephospho_pep].add(phospho_pep)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
501
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
502 if (
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
503 sequence
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
504 not in DephosphoPep_UniProtSeq_LUT[
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
505 (dephospho_pep, SEQUENCE)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
506 ]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
507 ):
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
508 DephosphoPep_UniProtSeq_LUT[
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
509 (dephospho_pep, SEQUENCE)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
510 ].append(sequence)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
511 for phospho_pep in DephosphoPep_UniProtSeq_LUT[dephospho_pep]:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
512 if phospho_pep != phospho_pep:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
513 print(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
514 "phospho_pep:'%s' phospho_pep:'%s'"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
515 % (phospho_pep, phospho_pep)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
516 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
517 if phospho_pep not in PhosphoPep_UniProtSeq_LUT:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
518 PhosphoPep_UniProtSeq_LUT[phospho_pep] = phospho_pep
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
519 PhosphoPep_UniProtSeq_LUT[
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
520 (phospho_pep, DEPHOSPHOPEP)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
521 ] = dephospho_pep
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
522 r = list(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
523 zip(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
524 [s for s in UniProtSeqLUT[(sequence, UNIPROT_ID)]],
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
525 [s for s in UniProtSeqLUT[(sequence, GENE_NAME)]],
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
526 [
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
527 s
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
528 for s in UniProtSeqLUT[(sequence, DESCRIPTION)]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
529 ],
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
530 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
531 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
532 # Sort by `UniProt_ID`
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
533 # ref: https://stackoverflow.com/a/4174955/15509512
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
534 r = sorted(r, key=operator.itemgetter(0))
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
535 # Get one tuple for each `phospho_pep`
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
536 # in DephosphoPep_UniProtSeq_LUT[dephospho_pep]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
537 for (upid, gn, desc) in r:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
538 # Append pseudo-tuple per UniProt_ID but only when it is not present
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
539 if (
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
540 upid
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
541 not in DephosphoPep_UniProtSeq_LUT[
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
542 (dephospho_pep, UNIPROT_ID)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
543 ]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
544 ):
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
545 DephosphoPep_UniProtSeq_LUT[
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
546 (dephospho_pep, UNIPROT_ID)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
547 ].append(upid)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
548 DephosphoPep_UniProtSeq_LUT[
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
549 (dephospho_pep, DESCRIPTION)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
550 ].append(desc)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
551 DephosphoPep_UniProtSeq_LUT[
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
552 (dephospho_pep, GENE_NAME)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
553 ].append(gn)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
554
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
555 # Close SwissProt SQLite database; clean up local variables
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
556 conn.close()
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
557 # wipe local variables
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
558 phospho_pep = dephospho_pep = sequence = 0
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
559 upid = gn = desc = r = ""
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
560
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
561 # ----------- Get SwissProt data from SQLite database (finish) -----------
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
562
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
563 end_time = time.process_time() # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
564 print(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
565 "%0.6f finished reading and decoding '%s' [0.4]"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
566 % (end_time - start_time, upstream_map_filename_tab),
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
567 file=sys.stderr,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
568 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
569
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
570 print(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
571 "{:>10} unique upstream phosphopeptides tested".format(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
572 str(len(upstream_map_p_peptide_list))
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
573 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
574 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
575
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
576 # Read in Upstream tabular file
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
577 # We are discarding the intensity data; so read it as text
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
578 upstream_data = pandas.read_table(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
579 upstream_map_filename_tab, dtype="str", index_col=0
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
580 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
581
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
582 end_time = time.process_time() # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
583 print(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
584 "%0.6f read Upstream Map from file [1g_1]"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
585 % (end_time - start_time,),
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
586 file=sys.stderr,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
587 ) # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
588
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
589 upstream_data.index = upstream_map_p_peptide_list
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
590
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
591 end_time = time.process_time() # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
592 print(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
593 "%0.6f added index to Upstream Map [1g_2]"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
594 % (end_time - start_time,),
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
595 file=sys.stderr,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
596 ) # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
597
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
598 # trim upstream_data to include only the upstream map columns
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
599 old_cols = upstream_data.columns.tolist()
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
600 i = 0
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
601 first_intensity = -1
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
602 last_intensity = -1
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
603 intensity_re = re.compile("Intensity.*")
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
604 for col_name in old_cols:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
605 m = intensity_re.match(col_name)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
606 if m:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
607 last_intensity = i
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
608 if first_intensity == -1:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
609 first_intensity = i
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
610 i += 1
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
611 # print('last intensity = %d' % last_intensity)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
612 col_PKCalpha = last_intensity + 2
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
613
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
614 data_in_cols = [old_cols[0]] + old_cols[
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
615 first_intensity: last_intensity + 1
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
616 ]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
617
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
618 if upstream_data.empty:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
619 print("upstream_data is empty")
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
620 exit(0)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
621
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
622 data_in = upstream_data.copy(deep=True)[data_in_cols]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
623
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
624 # Convert floating-point integers to int64 integers
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
625 # ref: https://stackoverflow.com/a/68497603/15509512
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
626 data_in[list(data_in.columns[1:])] = (
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
627 data_in[list(data_in.columns[1:])]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
628 .astype("float64")
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
629 .apply(np.int64)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
630 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
631
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
632 # create another phosphopeptide column that will be used to join later;
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
633 # MAY need to change depending on Phosphopeptide column position
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
634 # data_in[PHOSPHOPEPTIDE_MATCH] = data_in[data_in.columns.tolist()[0]]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
635 data_in[PHOSPHOPEPTIDE_MATCH] = data_in.index
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
636
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
637 end_time = time.process_time() # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
638 print(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
639 "%0.6f set data_in[PHOSPHOPEPTIDE_MATCH] [A]"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
640 % (end_time - start_time,),
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
641 file=sys.stderr,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
642 ) # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
643
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
644 # Produce a dictionary of metadata for a single phosphopeptide.
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
645 # This is a replacement of `UniProtInfo_subdict` in the original code.
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
646 def pseq_to_subdict(phospho_pep):
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
647 # Strip "p" from phosphopeptide sequence
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
648 dephospho_pep = re_phos.sub("", phospho_pep)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
649
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
650 # Determine number of phosphoresidues in phosphopeptide
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
651 numps = len(phospho_pep) - len(dephospho_pep)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
652
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
653 # Determine location(s) of phosphoresidue(s) in phosphopeptide
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
654 # (used later for Phosphoresidue, Sequence7, and Sequence10)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
655 ploc = [] # list of p locations
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
656 i = 0
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
657 p = phospho_pep
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
658 while i < numps:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
659 ploc.append(p.find("p"))
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
660 p = p[: p.find("p")] + p[p.find("p") + 1:]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
661 i += 1
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
662
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
663 # Establish nested dictionary
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
664 result = {}
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
665 result[SEQUENCE] = []
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
666 result[UNIPROT_ID] = []
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
667 result[DESCRIPTION] = []
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
668 result[GENE_NAME] = []
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
669 result[PHOSPHORESIDUE] = []
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
670 result[SEQUENCE7] = []
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
671 result[SEQUENCE10] = []
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
672
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
673 # Add stripped sequence to dictionary
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
674 result[SEQUENCE].append(dephospho_pep)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
675
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
676 # Locate phospho_pep in PhosphoPep_UniProtSeq_LUT
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
677 # Caller may elect to:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
678 # try:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
679 # ...
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
680 # except PreconditionError as pe:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
681 # print("'{expression}': {message}".format(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
682 # expression = pe.expression,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
683 # message = pe.message))
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
684 # )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
685 # )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
686 if dephospho_pep not in DephosphoPep_UniProtSeq_LUT:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
687 raise PreconditionError(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
688 dephospho_pep,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
689 "dephosphorylated phosphopeptide not found in DephosphoPep_UniProtSeq_LUT",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
690 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
691 if phospho_pep not in PhosphoPep_UniProtSeq_LUT:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
692 raise PreconditionError(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
693 dephospho_pep,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
694 "no matching phosphopeptide found in PhosphoPep_UniProtSeq_LUT",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
695 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
696 if (
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
697 dephospho_pep
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
698 != PhosphoPep_UniProtSeq_LUT[(phospho_pep, DEPHOSPHOPEP)]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
699 ):
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
700 raise PreconditionError(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
701 dephospho_pep,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
702 "dephosphorylated phosphopeptide does not match "
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
703 + "PhosphoPep_UniProtSeq_LUT[(phospho_pep,DEPHOSPHOPEP)] = "
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
704 + PhosphoPep_UniProtSeq_LUT[(phospho_pep, DEPHOSPHOPEP)],
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
705 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
706 result[SEQUENCE] = [dephospho_pep]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
707 result[UNIPROT_ID] = DephosphoPep_UniProtSeq_LUT[
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
708 (dephospho_pep, UNIPROT_ID)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
709 ]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
710 result[DESCRIPTION] = DephosphoPep_UniProtSeq_LUT[
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
711 (dephospho_pep, DESCRIPTION)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
712 ]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
713 result[GENE_NAME] = DephosphoPep_UniProtSeq_LUT[
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
714 (dephospho_pep, GENE_NAME)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
715 ]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
716 if (dephospho_pep, SEQUENCE) not in DephosphoPep_UniProtSeq_LUT:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
717 raise PreconditionError(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
718 dephospho_pep,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
719 "no matching phosphopeptide found in DephosphoPep_UniProtSeq_LUT",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
720 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
721 UniProtSeqList = DephosphoPep_UniProtSeq_LUT[
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
722 (dephospho_pep, SEQUENCE)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
723 ]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
724 if len(UniProtSeqList) < 1:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
725 print(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
726 "Skipping DephosphoPep_UniProtSeq_LUT[('%s',SEQUENCE)] because value has zero length"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
727 % dephospho_pep
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
728 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
729 # raise PreconditionError(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
730 # "DephosphoPep_UniProtSeq_LUT[('" + dephospho_pep + ",SEQUENCE)",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
731 # 'value has zero length'
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
732 # )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
733 for UniProtSeq in UniProtSeqList:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
734 i = 0
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
735 phosphoresidues = []
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
736 seq7s_set = set()
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
737 seq7s = []
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
738 seq10s_set = set()
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
739 seq10s = []
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
740 while i < len(ploc):
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
741 start = UniProtSeq.find(dephospho_pep)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
742 psite = (
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
743 start + ploc[i]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
744 ) # location of phosphoresidue on protein sequence
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
745
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
746 # add Phosphoresidue
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
747 phosphosite = "p" + str(UniProtSeq)[psite] + str(psite + 1)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
748 phosphoresidues.append(phosphosite)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
749
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
750 # Add Sequence7
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
751 if psite < 7: # phospho_pep at N terminus
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
752 seq7 = str(UniProtSeq)[: psite + 8]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
753 if seq7[psite] == "S": # if phosphosresidue is serine
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
754 pres = "s"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
755 elif (
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
756 seq7[psite] == "T"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
757 ): # if phosphosresidue is threonine
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
758 pres = "t"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
759 elif (
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
760 seq7[psite] == "Y"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
761 ): # if phosphoresidue is tyrosine
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
762 pres = "y"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
763 else: # if not pSTY
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
764 pres = "?"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
765 seq7 = (
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
766 seq7[:psite] + pres + seq7[psite + 1: psite + 8]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
767 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
768 while (
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
769 len(seq7) < 15
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
770 ): # add appropriate number of "_" to the front
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
771 seq7 = "_" + seq7
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
772 elif (
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
773 len(UniProtSeq) - psite < 8
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
774 ): # phospho_pep at C terminus
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
775 seq7 = str(UniProtSeq)[psite - 7:]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
776 if seq7[7] == "S":
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
777 pres = "s"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
778 elif seq7[7] == "T":
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
779 pres = "t"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
780 elif seq7[7] == "Y":
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
781 pres = "y"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
782 else:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
783 pres = "?"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
784 seq7 = seq7[:7] + pres + seq7[8:]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
785 while (
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
786 len(seq7) < 15
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
787 ): # add appropriate number of "_" to the back
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
788 seq7 = seq7 + "_"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
789 else:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
790 seq7 = str(UniProtSeq)[psite - 7: psite + 8]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
791 pres = "" # phosphoresidue
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
792 if seq7[7] == "S": # if phosphosresidue is serine
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
793 pres = "s"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
794 elif seq7[7] == "T": # if phosphosresidue is threonine
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
795 pres = "t"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
796 elif seq7[7] == "Y": # if phosphoresidue is tyrosine
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
797 pres = "y"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
798 else: # if not pSTY
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
799 pres = "?"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
800 seq7 = seq7[:7] + pres + seq7[8:]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
801 if seq7 not in seq7s_set:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
802 seq7s.append(seq7)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
803 seq7s_set.add(seq7)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
804
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
805 # add Sequence10
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
806 if psite < 10: # phospho_pep at N terminus
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
807 seq10 = (
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
808 str(UniProtSeq)[:psite]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
809 + "p"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
810 + str(UniProtSeq)[psite: psite + 11]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
811 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
812 elif (
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
813 len(UniProtSeq) - psite < 11
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
814 ): # phospho_pep at C terminus
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
815 seq10 = (
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
816 str(UniProtSeq)[psite - 10: psite]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
817 + "p"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
818 + str(UniProtSeq)[psite:]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
819 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
820 else:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
821 seq10 = str(UniProtSeq)[psite - 10: psite + 11]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
822 seq10 = seq10[:10] + "p" + seq10[10:]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
823 if seq10 not in seq10s_set:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
824 seq10s.append(seq10)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
825 seq10s_set.add(seq10)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
826
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
827 i += 1
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
828
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
829 result[PHOSPHORESIDUE].append(phosphoresidues)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
830 result[SEQUENCE7].append(seq7s)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
831 # result[SEQUENCE10] is a list of lists of strings
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
832 result[SEQUENCE10].append(seq10s)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
833
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
834 r = list(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
835 zip(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
836 result[UNIPROT_ID],
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
837 result[GENE_NAME],
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
838 result[DESCRIPTION],
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
839 result[PHOSPHORESIDUE],
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
840 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
841 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
842 # Sort by `UniProt_ID`
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
843 # ref: https://stackoverflow.com//4174955/15509512
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
844 s = sorted(r, key=operator.itemgetter(0))
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
845
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
846 result[UNIPROT_ID] = []
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
847 result[GENE_NAME] = []
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
848 result[DESCRIPTION] = []
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
849 result[PHOSPHORESIDUE] = []
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
850
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
851 for r in s:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
852 result[UNIPROT_ID].append(r[0])
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
853 result[GENE_NAME].append(r[1])
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
854 result[DESCRIPTION].append(r[2])
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
855 result[PHOSPHORESIDUE].append(r[3])
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
856
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
857 # convert lists to strings in the dictionary
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
858 for key, value in result.items():
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
859 if key not in [PHOSPHORESIDUE, SEQUENCE7, SEQUENCE10]:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
860 result[key] = "; ".join(map(str, value))
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
861 elif key in [SEQUENCE10]:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
862 # result[SEQUENCE10] is a list of lists of strings
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
863 joined_value = ""
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
864 joined_set = set()
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
865 sep = ""
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
866 for valL in value:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
867 # valL is a list of strings
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
868 for val in valL:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
869 # val is a string
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
870 if val not in joined_set:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
871 joined_set.add(val)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
872 # joined_value += sep + '; '.join(map(str, val))
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
873 joined_value += sep + val
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
874 sep = "; "
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
875 # joined_value is a string
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
876 result[key] = joined_value
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
877
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
878 newstring = "; ".join(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
879 [", ".join(prez) for prez in result[PHOSPHORESIDUE]]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
880 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
881 # #separate the isoforms in PHOSPHORESIDUE column with ";"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
882 # oldstring = result[PHOSPHORESIDUE]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
883 # oldlist = list(oldstring)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
884 # newstring = ""
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
885 # i = 0
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
886 # for e in oldlist:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
887 # if e == ";":
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
888 # if numps > 1:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
889 # if i%numps:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
890 # newstring = newstring + ";"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
891 # else:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
892 # newstring = newstring + ","
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
893 # else:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
894 # newstring = newstring + ";"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
895 # i +=1
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
896 # else:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
897 # newstring = newstring + e
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
898 result[PHOSPHORESIDUE] = newstring
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
899
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
900 # separate sequence7's by |
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
901 oldstring = result[SEQUENCE7]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
902 oldlist = oldstring
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
903 newstring = ""
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
904 for ol in oldlist:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
905 for e in ol:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
906 if e == ";":
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
907 newstring = newstring + " |"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
908 elif len(newstring) > 0 and 1 > newstring.count(e):
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
909 newstring = newstring + " | " + e
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
910 elif 1 > newstring.count(e):
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
911 newstring = newstring + e
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
912 result[SEQUENCE7] = newstring
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
913
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
914 return [phospho_pep, result]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
915
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
916 # Construct list of [string, dictionary] lists
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
917 # where the dictionary provides the SwissProt metadata for a phosphopeptide
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
918 result_list = [
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
919 catch(pseq_to_subdict, psequence)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
920 for psequence in data_in[PHOSPHOPEPTIDE_MATCH]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
921 ]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
922
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
923 end_time = time.process_time() # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
924 print(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
925 "%0.6f added SwissProt annotations to phosphopeptides [B]"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
926 % (end_time - start_time,),
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
927 file=sys.stderr,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
928 ) # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
929
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
930 # Construct dictionary from list of lists
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
931 # ref: https://www.8bitavenue.com/how-to-convert-list-of-lists-to-dictionary-in-python/
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
932 UniProt_Info = {
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
933 result[0]: result[1]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
934 for result in result_list
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
935 if result is not None
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
936 }
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
937
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
938 end_time = time.process_time() # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
939 print(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
940 "%0.6f create dictionary mapping phosphopeptide to metadata dictionary [C]"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
941 % (end_time - start_time,),
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
942 file=sys.stderr,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
943 ) # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
944
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
945 # cosmetic: add N_A to phosphopeptide rows with no hits
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
946 p_peptide_list = []
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
947 for key in UniProt_Info:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
948 p_peptide_list.append(key)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
949 for nestedKey in UniProt_Info[key]:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
950 if UniProt_Info[key][nestedKey] == "":
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
951 UniProt_Info[key][nestedKey] = N_A
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
952
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
953 end_time = time.process_time() # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
954 print(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
955 "%0.6f performed cosmetic clean-up [D]" % (end_time - start_time,),
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
956 file=sys.stderr,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
957 ) # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
958
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
959 # convert UniProt_Info dictionary to dataframe
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
960 uniprot_df = pandas.DataFrame.transpose(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
961 pandas.DataFrame.from_dict(UniProt_Info)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
962 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
963
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
964 # reorder columns to match expected output file
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
965 uniprot_df[
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
966 PHOSPHOPEPTIDE
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
967 ] = uniprot_df.index # make index a column too
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
968
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
969 cols = uniprot_df.columns.tolist()
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
970 # cols = [cols[-1]]+cols[4:6]+[cols[1]]+[cols[2]]+[cols[6]]+[cols[0]]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
971 # uniprot_df = uniprot_df[cols]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
972 uniprot_df = uniprot_df[
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
973 [
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
974 PHOSPHOPEPTIDE,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
975 SEQUENCE10,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
976 SEQUENCE7,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
977 GENE_NAME,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
978 PHOSPHORESIDUE,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
979 UNIPROT_ID,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
980 DESCRIPTION,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
981 ]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
982 ]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
983
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
984 end_time = time.process_time() # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
985 print(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
986 "%0.6f reordered columns to match expected output file [1]"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
987 % (end_time - start_time,),
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
988 file=sys.stderr,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
989 ) # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
990
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
991 # concat to split then groupby to collapse
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
992 seq7_df = pandas.concat(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
993 [
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
994 pandas.Series(row[PHOSPHOPEPTIDE], row[SEQUENCE7].split(" | "))
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
995 for _, row in uniprot_df.iterrows()
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
996 ]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
997 ).reset_index()
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
998 seq7_df.columns = [SEQUENCE7, PHOSPHOPEPTIDE]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
999
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1000 # --- -------------- begin read PSP_Regulatory_sites ---------------------------------
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1001 # read in PhosphoSitePlus Regulatory Sites dataset
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1002 # ----------- Get PhosphoSitePlus Regulatory Sites data from SQLite database (start) -----------
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1003 conn = sql.connect(uniprot_sqlite)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1004 regsites_df = pandas.read_sql_query(PSP_REGSITE_SQL, conn)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1005 # Close SwissProt SQLite database
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1006 conn.close()
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1007 # ... -------------- end read PSP_Regulatory_sites ------------------------------------
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1008
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1009 # keep only the human entries in dataframe
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1010 if len(species) > 0:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1011 print(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1012 'Limit PhosphoSitesPlus records to species "' + species + '"'
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1013 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1014 regsites_df = regsites_df[regsites_df.ORGANISM == species]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1015
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1016 # merge the seq7 df with the regsites df based off of the sequence7
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1017 merge_df = seq7_df.merge(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1018 regsites_df,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1019 left_on=SEQUENCE7,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1020 right_on=SITE_PLUSMINUS_7AA_SQL,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1021 how="left",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1022 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1023
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1024 # after merging df, select only the columns of interest - note that PROTEIN is absent here
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1025 merge_df = merge_df[
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1026 [
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1027 PHOSPHOPEPTIDE,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1028 SEQUENCE7,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1029 ON_FUNCTION,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1030 ON_PROCESS,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1031 ON_PROT_INTERACT,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1032 ON_OTHER_INTERACT,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1033 ON_NOTES,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1034 ]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1035 ]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1036 # combine column values of interest into one FUNCTION_PHOSPHORESIDUE column"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1037 merge_df[FUNCTION_PHOSPHORESIDUE] = merge_df[ON_FUNCTION].str.cat(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1038 merge_df[ON_PROCESS], sep="; ", na_rep=""
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1039 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1040 merge_df[FUNCTION_PHOSPHORESIDUE] = merge_df[
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1041 FUNCTION_PHOSPHORESIDUE
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1042 ].str.cat(merge_df[ON_PROT_INTERACT], sep="; ", na_rep="")
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1043 merge_df[FUNCTION_PHOSPHORESIDUE] = merge_df[
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1044 FUNCTION_PHOSPHORESIDUE
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1045 ].str.cat(merge_df[ON_OTHER_INTERACT], sep="; ", na_rep="")
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1046 merge_df[FUNCTION_PHOSPHORESIDUE] = merge_df[
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1047 FUNCTION_PHOSPHORESIDUE
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1048 ].str.cat(merge_df[ON_NOTES], sep="; ", na_rep="")
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1049
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1050 # remove the columns that were combined
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1051 merge_df = merge_df[
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1052 [PHOSPHOPEPTIDE, SEQUENCE7, FUNCTION_PHOSPHORESIDUE]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1053 ]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1054
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1055 end_time = time.process_time() # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1056 print(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1057 "%0.6f merge regsite metadata [1a]" % (end_time - start_time,),
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1058 file=sys.stderr,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1059 ) # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1060
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1061 # cosmetic changes to Function Phosphoresidue column
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1062 fp_series = pandas.Series(merge_df[FUNCTION_PHOSPHORESIDUE])
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1063
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1064 end_time = time.process_time() # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1065 print(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1066 "%0.6f more cosmetic changes [1b]" % (end_time - start_time,),
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1067 file=sys.stderr,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1068 ) # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1069
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1070 i = 0
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1071 while i < len(fp_series):
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1072 # remove the extra ";" so that it looks more professional
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1073 if fp_series[i] == "; ; ; ; ": # remove ; from empty hits
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1074 fp_series[i] = ""
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1075 while fp_series[i].endswith("; "): # remove ; from the ends
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1076 fp_series[i] = fp_series[i][:-2]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1077 while fp_series[i].startswith("; "): # remove ; from the beginning
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1078 fp_series[i] = fp_series[i][2:]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1079 fp_series[i] = fp_series[i].replace("; ; ; ; ", "; ")
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1080 fp_series[i] = fp_series[i].replace("; ; ; ", "; ")
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1081 fp_series[i] = fp_series[i].replace("; ; ", "; ")
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1082
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1083 # turn blanks into N_A to signify the info was searched for but cannot be found
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1084 if fp_series[i] == "":
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1085 fp_series[i] = N_A
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1086
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1087 i += 1
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1088 merge_df[FUNCTION_PHOSPHORESIDUE] = fp_series
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1089
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1090 end_time = time.process_time() # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1091 print(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1092 "%0.6f cleaned up semicolons [1c]" % (end_time - start_time,),
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1093 file=sys.stderr,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1094 ) # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1095
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1096 # merge uniprot df with merge df
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1097 uniprot_regsites_merged_df = uniprot_df.merge(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1098 merge_df,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1099 left_on=PHOSPHOPEPTIDE,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1100 right_on=PHOSPHOPEPTIDE,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1101 how="left",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1102 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1103
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1104 # collapse the merged df
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1105 uniprot_regsites_collapsed_df = pandas.DataFrame(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1106 uniprot_regsites_merged_df.groupby(PHOSPHOPEPTIDE)[
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1107 FUNCTION_PHOSPHORESIDUE
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1108 ].apply(lambda x: ppep_join(x))
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1109 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1110 # .apply(lambda x: "%s" % ' | '.join(x)))
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1111
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1112 end_time = time.process_time() # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1113 print(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1114 "%0.6f collapsed pandas dataframe [1d]" % (end_time - start_time,),
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1115 file=sys.stderr,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1116 ) # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1117
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1118 uniprot_regsites_collapsed_df[
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1119 PHOSPHOPEPTIDE
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1120 ] = (
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1121 uniprot_regsites_collapsed_df.index
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1122 ) # add df index as its own column
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1123
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1124 # rename columns
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1125 uniprot_regsites_collapsed_df.columns = [
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1126 FUNCTION_PHOSPHORESIDUE,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1127 "ppp",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1128 ]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1129
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1130 end_time = time.process_time() # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1131 print(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1132 "%0.6f selected columns to be merged to uniprot_df [1e]"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1133 % (end_time - start_time,),
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1134 file=sys.stderr,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1135 ) # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1136
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1137 # add columns based on Sequence7 matching site_+/-7_AA
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1138 uniprot_regsite_df = pandas.merge(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1139 left=uniprot_df,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1140 right=uniprot_regsites_collapsed_df,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1141 how="left",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1142 left_on=PHOSPHOPEPTIDE,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1143 right_on="ppp",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1144 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1145
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1146 end_time = time.process_time() # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1147 print(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1148 "%0.6f added columns based on Sequence7 matching site_+/-7_AA [1f]"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1149 % (end_time - start_time,),
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1150 file=sys.stderr,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1151 ) # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1152
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1153 data_in.rename(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1154 {"Protein description": PHOSPHOPEPTIDE},
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1155 axis="columns",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1156 inplace=True,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1157 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1158
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1159 # data_in.sort_values(PHOSPHOPEPTIDE_MATCH, inplace=True, kind='mergesort')
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1160 res2 = sorted(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1161 data_in[PHOSPHOPEPTIDE_MATCH].tolist(), key=lambda s: s.casefold()
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1162 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1163 data_in = data_in.loc[res2]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1164
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1165 end_time = time.process_time() # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1166 print(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1167 "%0.6f sorting time [1f]" % (end_time - start_time,),
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1168 file=sys.stderr,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1169 ) # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1170
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1171 cols = [old_cols[0]] + old_cols[col_PKCalpha - 1:]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1172 upstream_data = upstream_data[cols]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1173
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1174 end_time = time.process_time() # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1175 print(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1176 "%0.6f refactored columns for Upstream Map [1g]"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1177 % (end_time - start_time,),
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1178 file=sys.stderr,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1179 ) # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1180
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1181 # #rename upstream columns in new list
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1182 # new_cols = []
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1183 # for name in cols:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1184 # if "_NetworKIN" in name:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1185 # name = name.split("_")[0]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1186 # if " motif" in name:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1187 # name = name.split(" motif")[0]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1188 # if " sequence " in name:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1189 # name = name.split(" sequence")[0]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1190 # if "_Phosida" in name:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1191 # name = name.split("_")[0]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1192 # if "_PhosphoSite" in name:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1193 # name = name.split("_")[0]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1194 # new_cols.append(name)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1195
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1196 # rename upstream columns in new list
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1197 def col_rename(name):
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1198 if "_NetworKIN" in name:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1199 name = name.split("_")[0]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1200 if " motif" in name:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1201 name = name.split(" motif")[0]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1202 if " sequence " in name:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1203 name = name.split(" sequence")[0]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1204 if "_Phosida" in name:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1205 name = name.split("_")[0]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1206 if "_PhosphoSite" in name:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1207 name = name.split("_")[0]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1208 return name
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1209
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1210 new_cols = [col_rename(col) for col in cols]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1211 upstream_data.columns = new_cols
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1212
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1213 end_time = time.process_time() # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1214 print(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1215 "%0.6f renamed columns for Upstream Map [1h_1]"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1216 % (end_time - start_time,),
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1217 file=sys.stderr,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1218 ) # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1219
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1220 # Create upstream_data_cast as a copy of upstream_data
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1221 # but with first column substituted by the phosphopeptide sequence
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1222 upstream_data_cast = upstream_data.copy()
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1223 new_cols_cast = new_cols
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1224 new_cols_cast[0] = "p_peptide"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1225 upstream_data_cast.columns = new_cols_cast
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1226 upstream_data_cast["p_peptide"] = upstream_data.index
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1227
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1228 # --- -------------- begin read upstream_data_melt ------------------------------------
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1229 # ----------- Get melted kinase mapping data from SQLite database (start) -----------
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1230 conn = sql.connect(uniprot_sqlite)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1231 upstream_data_melt_df = pandas.read_sql_query(PPEP_MELT_SQL, conn)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1232 # Close SwissProt SQLite database
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1233 conn.close()
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1234 upstream_data_melt = upstream_data_melt_df.copy()
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1235 upstream_data_melt.columns = ["p_peptide", "characterization", "X"]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1236 upstream_data_melt["characterization"] = [
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1237 col_rename(s) for s in upstream_data_melt["characterization"]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1238 ]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1239
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1240 print(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1241 "%0.6f upstream_data_melt_df initially has %d rows"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1242 % (end_time - start_time, len(upstream_data_melt.axes[0])),
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1243 file=sys.stderr,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1244 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1245 # ref: https://stackoverflow.com/a/27360130/15509512
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1246 # e.g. df.drop(df[df.score < 50].index, inplace=True)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1247 upstream_data_melt.drop(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1248 upstream_data_melt[upstream_data_melt.X != "X"].index, inplace=True
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1249 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1250 print(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1251 "%0.6f upstream_data_melt_df pre-dedup has %d rows"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1252 % (end_time - start_time, len(upstream_data_melt.axes[0])),
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1253 file=sys.stderr,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1254 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1255 # ----------- Get melted kinase mapping data from SQLite database (finish) -----------
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1256 # ... -------------- end read upstream_data_melt --------------------------------------
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1257
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1258 end_time = time.process_time() # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1259 print(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1260 "%0.6f melted and minimized Upstream Map dataframe [1h_2]"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1261 % (end_time - start_time,),
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1262 file=sys.stderr,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1263 ) # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1264 # ... end read upstream_data_melt
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1265
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1266 end_time = time.process_time() # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1267 print(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1268 "%0.6f indexed melted Upstream Map [1h_2a]"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1269 % (end_time - start_time,),
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1270 file=sys.stderr,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1271 ) # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1272
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1273 upstream_delta_melt_LoL = upstream_data_melt.values.tolist()
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1274
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1275 melt_dict = {}
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1276 for key in upstream_map_p_peptide_list:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1277 melt_dict[key] = []
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1278
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1279 for el in upstream_delta_melt_LoL:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1280 (p_peptide, characterization, X) = tuple(el)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1281 if p_peptide in melt_dict:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1282 melt_dict[p_peptide].append(characterization)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1283 else:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1284 exit(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1285 'Phosphopeptide %s not found in ppep_mapping_db: "phopsphopeptides" and "ppep_mapping_db" must both originate from the same run of mqppep_kinase_mapping'
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1286 % (p_peptide)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1287 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1288
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1289 end_time = time.process_time() # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1290 print(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1291 "%0.6f appended peptide characterizations [1h_2b]"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1292 % (end_time - start_time,),
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1293 file=sys.stderr,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1294 ) # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1295
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1296 # for key in upstream_map_p_peptide_list:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1297 # melt_dict[key] = ' | '.join(melt_dict[key])
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1298
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1299 for key in upstream_map_p_peptide_list:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1300 melt_dict[key] = melt_join(melt_dict[key])
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1301
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1302 end_time = time.process_time() # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1303 print(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1304 "%0.6f concatenated multiple characterizations [1h_2c]"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1305 % (end_time - start_time,),
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1306 file=sys.stderr,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1307 ) # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1308
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1309 # map_dict is a dictionary of dictionaries
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1310 map_dict = {}
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1311 for key in upstream_map_p_peptide_list:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1312 map_dict[key] = {}
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1313 map_dict[key][PUTATIVE_UPSTREAM_DOMAINS] = melt_dict[key]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1314
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1315 end_time = time.process_time() # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1316 print(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1317 "%0.6f instantiated map dictionary [2]" % (end_time - start_time,),
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1318 file=sys.stderr,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1319 ) # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1320
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1321 # convert map_dict to dataframe
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1322 map_df = pandas.DataFrame.transpose(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1323 pandas.DataFrame.from_dict(map_dict)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1324 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1325 map_df["p-peptide"] = map_df.index # make index a column too
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1326 cols_map_df = map_df.columns.tolist()
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1327 cols_map_df = [cols_map_df[1]] + [cols_map_df[0]]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1328 map_df = map_df[cols_map_df]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1329
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1330 # join map_df to uniprot_regsite_df
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1331 output_df = uniprot_regsite_df.merge(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1332 map_df, how="left", left_on=PHOSPHOPEPTIDE, right_on="p-peptide"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1333 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1334
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1335 output_df = output_df[
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1336 [
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1337 PHOSPHOPEPTIDE,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1338 SEQUENCE10,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1339 SEQUENCE7,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1340 GENE_NAME,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1341 PHOSPHORESIDUE,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1342 UNIPROT_ID,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1343 DESCRIPTION,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1344 FUNCTION_PHOSPHORESIDUE,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1345 PUTATIVE_UPSTREAM_DOMAINS,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1346 ]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1347 ]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1348
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1349 # cols_output_prelim = output_df.columns.tolist()
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1350 #
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1351 # print("cols_output_prelim")
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1352 # print(cols_output_prelim)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1353 #
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1354 # cols_output = cols_output_prelim[:8]+[cols_output_prelim[9]]+[cols_output_prelim[10]]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1355 #
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1356 # print("cols_output with p-peptide")
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1357 # print(cols_output)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1358 #
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1359 # cols_output = [col for col in cols_output if not col == "p-peptide"]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1360 #
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1361 # print("cols_output")
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1362 # print(cols_output)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1363 #
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1364 # output_df = output_df[cols_output]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1365
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1366 # join output_df back to quantitative columns in data_in df
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1367 quant_cols = data_in.columns.tolist()
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1368 quant_cols = quant_cols[1:]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1369 quant_data = data_in[quant_cols]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1370
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1371 # ----------- Write merge/filter metadata to SQLite database (start) -----------
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1372 # Open SwissProt SQLite database
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1373 conn = sql.connect(output_sqlite)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1374 cur = conn.cursor()
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1375
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1376 cur.executescript(MRGFLTR_DDL)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1377
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1378 cur.execute(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1379 CITATION_INSERT_STMT,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1380 ("mrgfltr_metadata_view", CITATION_INSERT_PSP),
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1381 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1382 cur.execute(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1383 CITATION_INSERT_STMT, ("mrgfltr_metadata", CITATION_INSERT_PSP)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1384 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1385 cur.execute(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1386 CITATION_INSERT_STMT,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1387 ("mrgfltr_metadata_view", CITATION_INSERT_PSP_REF),
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1388 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1389 cur.execute(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1390 CITATION_INSERT_STMT, ("mrgfltr_metadata", CITATION_INSERT_PSP_REF)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1391 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1392
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1393 # Read ppep-to-sequence LUT
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1394 ppep_lut_df = pandas.read_sql_query(PPEP_ID_SQL, conn)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1395 # write only metadata for merged/filtered records to SQLite
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1396 mrgfltr_metadata_df = output_df.copy()
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1397 # replace phosphopeptide seq with ppep.id
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1398 mrgfltr_metadata_df = ppep_lut_df.merge(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1399 mrgfltr_metadata_df,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1400 left_on="ppep_seq",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1401 right_on=PHOSPHOPEPTIDE,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1402 how="inner",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1403 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1404 mrgfltr_metadata_df.drop(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1405 columns=[PHOSPHOPEPTIDE, "ppep_seq"], inplace=True
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1406 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1407 # rename columns
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1408 mrgfltr_metadata_df.columns = MRGFLTR_METADATA_COLUMNS
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1409 mrgfltr_metadata_df.to_sql(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1410 "mrgfltr_metadata",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1411 con=conn,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1412 if_exists="append",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1413 index=False,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1414 method="multi",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1415 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1416
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1417 # Close SwissProt SQLite database
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1418 conn.close()
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1419 # ----------- Write merge/filter metadata to SQLite database (finish) -----------
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1420
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1421 output_df = output_df.merge(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1422 quant_data,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1423 how="right",
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1424 left_on=PHOSPHOPEPTIDE,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1425 right_on=PHOSPHOPEPTIDE_MATCH,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1426 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1427 output_cols = output_df.columns.tolist()
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1428 output_cols = output_cols[:-1]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1429 output_df = output_df[output_cols]
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1430
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1431 # cosmetic changes to Upstream column
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1432 output_df[PUTATIVE_UPSTREAM_DOMAINS] = output_df[
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1433 PUTATIVE_UPSTREAM_DOMAINS
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1434 ].fillna(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1435 ""
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1436 ) # fill the NaN with "" for those Phosphopeptides that got a "WARNING: Failed match for " in the upstream mapping
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1437 us_series = pandas.Series(output_df[PUTATIVE_UPSTREAM_DOMAINS])
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1438 i = 0
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1439 while i < len(us_series):
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1440 # turn blanks into N_A to signify the info was searched for but cannot be found
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1441 if us_series[i] == "":
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1442 us_series[i] = N_A
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1443 i += 1
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1444 output_df[PUTATIVE_UPSTREAM_DOMAINS] = us_series
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1445
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1446 end_time = time.process_time() # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1447 print(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1448 "%0.6f establisheed output [3]" % (end_time - start_time,),
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1449 file=sys.stderr,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1450 ) # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1451
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1452 (output_rows, output_cols) = output_df.shape
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1453
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1454 output_df = output_df.convert_dtypes(convert_integer=True)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1455
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1456 # Output onto Final CSV file
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1457 output_df.to_csv(output_filename_csv, index=False)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1458 output_df.to_csv(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1459 output_filename_tab, quoting=None, sep="\t", index=False
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1460 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1461
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1462 end_time = time.process_time() # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1463 print(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1464 "%0.6f wrote output [4]" % (end_time - start_time,),
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1465 file=sys.stderr,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1466 ) # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1467
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1468 print(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1469 "{:>10} phosphopeptides written to output".format(str(output_rows))
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1470 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1471
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1472 end_time = time.process_time() # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1473 print(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1474 "%0.6f seconds of non-system CPU time were consumed"
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1475 % (end_time - start_time,),
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1476 file=sys.stderr,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1477 ) # timer
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1478
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1479 # Rev. 7/1/2016
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1480 # Rev. 7/3/2016 : fill NaN in Upstream column to replace to N/A's
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1481 # Rev. 7/3/2016: renamed Upstream column to PUTATIVE_UPSTREAM_DOMAINS
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1482 # Rev. 12/2/2021: Converted to Python from ipynb; use fast Aho-Corasick searching; \
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1483 # read from SwissProt SQLite database
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1484 # Rev. 12/9/2021: Transfer code to Galaxy tool wrapper
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1485
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1486 #
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1487 # copied from Excel Output Script.ipynb END #
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1488 #
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1489
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1490 try:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1491 catch(
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1492 mqpep_getswissprot,
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1493 )
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1494 exit(0)
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1495 except Exception as e:
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1496 exit("Internal error running mqpep_getswissprot(): %s" % (e))
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1497
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1498
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1499 if __name__ == "__main__":
d4d531006735 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents: 0
diff changeset
1500 __main__()