annotate search_ppep.py @ 26:5b8e15b2a67c draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e0b80550743f634282b4b4348b75e6f172dc1488
author eschen42
date Wed, 26 Oct 2022 23:48:51 +0000
parents 3911581e639a
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
23
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
1 #!/usr/bin/env python
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
2 # Search and memoize phosphopeptides in Swiss-Prot SQLite table UniProtKB
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
3
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
4 import argparse
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
5 import os.path
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
6 import re
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
7 import sqlite3
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
8 import sys # import the sys module for exc_info
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
9 import time
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
10 import traceback # import the traceback module for format_exception
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
11 from codecs import getreader as cx_getreader
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
12
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
13 # For Aho-Corasick search for fixed set of substrings
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
14 # - add_word
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
15 # - make_automaton
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
16 # - iter
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
17 import ahocorasick
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
18
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
19
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
20 # ref: https://stackoverflow.com/a/8915613/15509512
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
21 # answers: "How to handle exceptions in a list comprehensions"
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
22 # usage:
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
23 # from math import log
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
24 # eggs = [1,3,0,3,2]
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
25 # print([x for x in [catch(log, egg) for egg in eggs] if x is not None])
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
26 # producing:
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
27 # for <built-in function log>
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
28 # with args (0,)
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
29 # exception: math domain error
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
30 # [0.0, 1.0986122886681098, 1.0986122886681098, 0.6931471805599453]
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
31 def catch(func, *args, handle=lambda e: e, **kwargs):
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
32
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
33 try:
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
34 return func(*args, **kwargs)
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
35 except Exception as e:
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
36 print("For %s" % str(func))
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
37 print(" with args %s" % str(args))
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
38 print(" caught exception: %s" % str(e))
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
39 (ty, va, tb) = sys.exc_info()
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
40 print(" stack trace: " + str(traceback.format_exception(ty, va, tb)))
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
41 # exit(-1)
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
42 return None # was handle(e)
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
43
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
44
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
45 def __main__():
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
46
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
47 DROP_TABLES_SQL = """
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
48 DROP VIEW IF EXISTS ppep_gene_site_view;
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
49 DROP VIEW IF EXISTS uniprot_view;
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
50 DROP VIEW IF EXISTS uniprotkb_pep_ppep_view;
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
51 DROP VIEW IF EXISTS ppep_intensity_view;
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
52 DROP VIEW IF EXISTS ppep_metadata_view;
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
53
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
54 DROP TABLE IF EXISTS sample;
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
55 DROP TABLE IF EXISTS ppep;
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
56 DROP TABLE IF EXISTS site_type;
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
57 DROP TABLE IF EXISTS deppep_UniProtKB;
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
58 DROP TABLE IF EXISTS deppep;
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
59 DROP TABLE IF EXISTS ppep_gene_site;
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
60 DROP TABLE IF EXISTS ppep_metadata;
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
61 DROP TABLE IF EXISTS ppep_intensity;
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
62 """
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
63
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
64 CREATE_TABLES_SQL = """
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
65 CREATE TABLE deppep
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
66 ( id INTEGER PRIMARY KEY
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
67 , seq TEXT UNIQUE ON CONFLICT IGNORE
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
68 )
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
69 ;
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
70 CREATE TABLE deppep_UniProtKB
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
71 ( deppep_id INTEGER REFERENCES deppep(id) ON DELETE CASCADE
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
72 , UniProtKB_id TEXT REFERENCES UniProtKB(id) ON DELETE CASCADE
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
73 , pos_start INTEGER
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
74 , pos_end INTEGER
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
75 , PRIMARY KEY (deppep_id, UniProtKB_id, pos_start, pos_end)
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
76 ON CONFLICT IGNORE
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
77 )
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
78 ;
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
79 CREATE TABLE ppep
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
80 ( id INTEGER PRIMARY KEY
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
81 , deppep_id INTEGER REFERENCES deppep(id) ON DELETE CASCADE
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
82 , seq TEXT UNIQUE ON CONFLICT IGNORE
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
83 , scrubbed TEXT
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
84 );
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
85 CREATE TABLE site_type
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
86 ( id INTEGER PRIMARY KEY
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
87 , type_name TEXT UNIQUE ON CONFLICT IGNORE
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
88 );
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
89 CREATE INDEX idx_ppep_scrubbed on ppep(scrubbed)
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
90 ;
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
91 CREATE TABLE sample
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
92 ( id INTEGER PRIMARY KEY
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
93 , name TEXT UNIQUE ON CONFLICT IGNORE
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
94 )
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
95 ;
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
96 CREATE VIEW uniprot_view AS
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
97 SELECT DISTINCT
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
98 Uniprot_ID
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
99 , Description
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
100 , Organism_Name
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
101 , Organism_ID
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
102 , Gene_Name
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
103 , PE
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
104 , SV
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
105 , Sequence
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
106 , Description ||
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
107 CASE WHEN Organism_Name = 'N/A'
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
108 THEN ''
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
109 ELSE ' OS='|| Organism_Name
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
110 END ||
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
111 CASE WHEN Organism_ID = -1
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
112 THEN ''
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
113 ELSE ' OX='|| Organism_ID
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
114 END ||
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
115 CASE WHEN Gene_Name = 'N/A'
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
116 THEN ''
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
117 ELSE ' GN='|| Gene_Name
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
118 END ||
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
119 CASE WHEN PE = 'N/A'
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
120 THEN ''
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
121 ELSE ' PE='|| PE
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
122 END ||
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
123 CASE WHEN SV = 'N/A'
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
124 THEN ''
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
125 ELSE ' SV='|| SV
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
126 END AS long_description
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
127 , Database
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
128 FROM UniProtKB
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
129 ;
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
130 CREATE VIEW uniprotkb_pep_ppep_view AS
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
131 SELECT deppep_UniProtKB.UniprotKB_ID AS accession
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
132 , deppep_UniProtKB.pos_start AS pos_start
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
133 , deppep_UniProtKB.pos_end AS pos_end
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
134 , deppep.seq AS peptide
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
135 , ppep.seq AS phosphopeptide
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
136 , ppep.scrubbed AS scrubbed
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
137 , uniprot_view.Sequence AS sequence
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
138 , uniprot_view.Description AS description
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
139 , uniprot_view.long_description AS long_description
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
140 , ppep.id AS ppep_id
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
141 FROM ppep, deppep, deppep_UniProtKB, uniprot_view
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
142 WHERE deppep.id = ppep.deppep_id
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
143 AND deppep.id = deppep_UniProtKB.deppep_id
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
144 AND deppep_UniProtKB.UniprotKB_ID = uniprot_view.Uniprot_ID
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
145 ORDER BY UniprotKB_ID, deppep.seq, ppep.seq
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
146 ;
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
147 CREATE TABLE ppep_gene_site
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
148 ( ppep_id INTEGER REFERENCES ppep(id)
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
149 , gene_names TEXT
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
150 , site_type_id INTEGER REFERENCES site_type(id)
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
151 , kinase_map TEXT
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
152 , PRIMARY KEY (ppep_id, kinase_map) ON CONFLICT IGNORE
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
153 )
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
154 ;
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
155 CREATE VIEW ppep_gene_site_view AS
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
156 SELECT DISTINCT
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
157 ppep.seq AS phospho_peptide
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
158 , ppep_id
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
159 , gene_names
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
160 , type_name
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
161 , kinase_map
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
162 FROM
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
163 ppep, ppep_gene_site, site_type
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
164 WHERE
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
165 ppep_gene_site.ppep_id = ppep.id
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
166 AND
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
167 ppep_gene_site.site_type_id = site_type.id
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
168 ORDER BY
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
169 ppep.seq
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
170 ;
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
171 CREATE TABLE ppep_metadata
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
172 ( ppep_id INTEGER REFERENCES ppep(id)
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
173 , protein_description TEXT
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
174 , gene_name TEXT
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
175 , FASTA_name TEXT
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
176 , phospho_sites TEXT
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
177 , motifs_unique TEXT
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
178 , accessions TEXT
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
179 , motifs_all_members TEXT
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
180 , domain TEXT
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
181 , ON_FUNCTION TEXT
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
182 , ON_PROCESS TEXT
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
183 , ON_PROT_INTERACT TEXT
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
184 , ON_OTHER_INTERACT TEXT
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
185 , notes TEXT
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
186 , PRIMARY KEY (ppep_id) ON CONFLICT IGNORE
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
187 )
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
188 ;
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
189 CREATE VIEW ppep_metadata_view AS
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
190 SELECT DISTINCT
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
191 ppep.seq AS phospho_peptide
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
192 , protein_description
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
193 , gene_name
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
194 , FASTA_name
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
195 , phospho_sites
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
196 , motifs_unique
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
197 , accessions
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
198 , motifs_all_members
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
199 , domain
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
200 , ON_FUNCTION
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
201 , ON_PROCESS
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
202 , ON_PROT_INTERACT
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
203 , ON_OTHER_INTERACT
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
204 , notes
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
205 FROM
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
206 ppep, ppep_metadata
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
207 WHERE
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
208 ppep_metadata.ppep_id = ppep.id
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
209 ORDER BY
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
210 ppep.seq
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
211 ;
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
212 CREATE TABLE ppep_intensity
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
213 ( ppep_id INTEGER REFERENCES ppep(id)
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
214 , sample_id INTEGER
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
215 , intensity INTEGER
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
216 , PRIMARY KEY (ppep_id, sample_id) ON CONFLICT IGNORE
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
217 )
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
218 ;
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
219 CREATE VIEW ppep_intensity_view AS
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
220 SELECT DISTINCT
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
221 ppep.seq AS phospho_peptide
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
222 , sample.name AS sample
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
223 , intensity
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
224 FROM
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
225 ppep, sample, ppep_intensity
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
226 WHERE
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
227 ppep_intensity.sample_id = sample.id
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
228 AND
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
229 ppep_intensity.ppep_id = ppep.id
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
230 ;
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
231 """
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
232
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
233 UNIPROT_SEQ_AND_ID_SQL = """
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
234 select Sequence, Uniprot_ID
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
235 from UniProtKB
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
236 """
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
237
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
238 # Parse Command Line
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
239 parser = argparse.ArgumentParser(
26
5b8e15b2a67c planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e0b80550743f634282b4b4348b75e6f172dc1488
eschen42
parents: 23
diff changeset
240 description=" ".join([
5b8e15b2a67c planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e0b80550743f634282b4b4348b75e6f172dc1488
eschen42
parents: 23
diff changeset
241 "Phopsphoproteomic Enrichment",
5b8e15b2a67c planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e0b80550743f634282b4b4348b75e6f172dc1488
eschen42
parents: 23
diff changeset
242 "phosphopeptide SwissProt search (in place in SQLite DB)."
5b8e15b2a67c planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e0b80550743f634282b4b4348b75e6f172dc1488
eschen42
parents: 23
diff changeset
243 ])
23
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
244 )
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
245
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
246 # inputs:
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
247 # Phosphopeptide data for experimental results, including the intensities
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
248 # and the mapping to kinase domains, in tabular format.
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
249 parser.add_argument(
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
250 "--phosphopeptides",
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
251 "-p",
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
252 nargs=1,
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
253 required=True,
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
254 dest="phosphopeptides",
26
5b8e15b2a67c planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e0b80550743f634282b4b4348b75e6f172dc1488
eschen42
parents: 23
diff changeset
255 help=" ".join([
5b8e15b2a67c planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e0b80550743f634282b4b4348b75e6f172dc1488
eschen42
parents: 23
diff changeset
256 "Phosphopeptide data for experimental results,",
5b8e15b2a67c planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e0b80550743f634282b4b4348b75e6f172dc1488
eschen42
parents: 23
diff changeset
257 "generated by the Phopsphoproteomic Enrichment Localization",
5b8e15b2a67c planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e0b80550743f634282b4b4348b75e6f172dc1488
eschen42
parents: 23
diff changeset
258 "Filter tool"
5b8e15b2a67c planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e0b80550743f634282b4b4348b75e6f172dc1488
eschen42
parents: 23
diff changeset
259 ]),
23
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
260 )
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
261 parser.add_argument(
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
262 "--uniprotkb",
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
263 "-u",
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
264 nargs=1,
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
265 required=True,
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
266 dest="uniprotkb",
26
5b8e15b2a67c planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e0b80550743f634282b4b4348b75e6f172dc1488
eschen42
parents: 23
diff changeset
267 help=" ".join([
5b8e15b2a67c planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e0b80550743f634282b4b4348b75e6f172dc1488
eschen42
parents: 23
diff changeset
268 "UniProtKB/Swiss-Prot data, converted from FASTA format by the",
5b8e15b2a67c planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e0b80550743f634282b4b4348b75e6f172dc1488
eschen42
parents: 23
diff changeset
269 "Phopsphoproteomic Enrichment Kinase Mapping tool"
5b8e15b2a67c planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e0b80550743f634282b4b4348b75e6f172dc1488
eschen42
parents: 23
diff changeset
270 ]),
23
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
271 )
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
272 parser.add_argument(
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
273 "--schema",
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
274 action="store_true",
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
275 dest="db_schema",
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
276 help="show updated database schema",
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
277 )
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
278 parser.add_argument(
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
279 "--warn-duplicates",
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
280 action="store_true",
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
281 dest="warn_duplicates",
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
282 help="show warnings for duplicated sequences",
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
283 )
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
284 parser.add_argument(
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
285 "--verbose",
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
286 action="store_true",
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
287 dest="verbose",
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
288 help="show somewhat verbose program tracing",
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
289 )
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
290 # "Make it so!" (parse the arguments)
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
291 options = parser.parse_args()
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
292 if options.verbose:
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
293 print("options: " + str(options) + "\n")
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
294
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
295 # path to phosphopeptide (e.g., "outputfile_STEP2.txt") input tabular file
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
296 if options.phosphopeptides is None:
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
297 exit('Argument "phosphopeptides" is required but not supplied')
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
298 try:
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
299 f_name = os.path.abspath(options.phosphopeptides[0])
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
300 except Exception as e:
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
301 exit("Error parsing phosphopeptides argument: %s" % (e))
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
302
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
303 # path to SQLite input/output tabular file
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
304 if options.uniprotkb is None:
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
305 exit('Argument "uniprotkb" is required but not supplied')
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
306 try:
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
307 db_name = os.path.abspath(options.uniprotkb[0])
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
308 except Exception as e:
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
309 exit("Error parsing uniprotkb argument: %s" % (e))
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
310
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
311 # print("options.schema is %d" % options.db_schema)
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
312
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
313 # db_name = "demo/test.sqlite"
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
314 # f_name = "demo/test_input.txt"
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
315
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
316 con = sqlite3.connect(db_name)
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
317 cur = con.cursor()
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
318 ker = con.cursor()
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
319
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
320 cur.executescript(DROP_TABLES_SQL)
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
321
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
322 # if options.db_schema:
26
5b8e15b2a67c planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e0b80550743f634282b4b4348b75e6f172dc1488
eschen42
parents: 23
diff changeset
323 # print("\nAfter dropping tables/views that are to be created,"
5b8e15b2a67c planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e0b80550743f634282b4b4348b75e6f172dc1488
eschen42
parents: 23
diff changeset
324 # + schema is:")
23
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
325 # cur.execute("SELECT * FROM sqlite_schema")
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
326 # for row in cur.fetchall():
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
327 # if row[4] is not None:
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
328 # print("%s;" % row[4])
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
329
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
330 cur.executescript(CREATE_TABLES_SQL)
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
331
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
332 if options.db_schema:
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
333 print(
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
334 "\nAfter creating tables/views that are to be created, schema is:"
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
335 )
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
336 cur.execute("SELECT * FROM sqlite_schema")
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
337 for row in cur.fetchall():
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
338 if row[4] is not None:
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
339 print("%s;" % row[4])
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
340
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
341 def generate_ppep(f):
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
342 # get keys from upstream tabular file using readline()
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
343 # ref: https://stackoverflow.com/a/16713581/15509512
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
344 # answer to "Use codecs to read file with correct encoding"
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
345 file1_encoded = open(f, "rb")
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
346 file1 = cx_getreader("latin-1")(file1_encoded)
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
347
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
348 count = 0
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
349 re_tab = re.compile("^[^\t]*")
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
350 re_quote = re.compile('"')
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
351 while True:
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
352 count += 1
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
353 # Get next line from file
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
354 line = file1.readline()
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
355 # if line is empty
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
356 # end of file is reached
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
357 if not line:
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
358 break
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
359 if count > 1:
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
360 m = re_tab.match(line)
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
361 m = re_quote.sub("", m[0])
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
362 yield m
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
363 file1.close()
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
364 file1_encoded.close()
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
365
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
366 # Build an Aho-Corasick automaton from a trie
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
367 # - ref:
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
368 # - https://pypi.org/project/pyahocorasick/
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
369 # - https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
370 # - https://en.wikipedia.org/wiki/Trie
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
371 auto = ahocorasick.Automaton()
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
372 re_phos = re.compile("p")
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
373 # scrub out unsearchable characters per section
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
374 # "Match the p_peptides to the @sequences array:"
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
375 # of the original
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
376 # PhosphoPeptide Upstream Kinase Mapping.pl
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
377 # which originally read
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
378 # $tmp_p_peptide =~ s/#//g;
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
379 # $tmp_p_peptide =~ s/\d//g;
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
380 # $tmp_p_peptide =~ s/\_//g;
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
381 # $tmp_p_peptide =~ s/\.//g;
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
382 #
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
383 re_scrub = re.compile("0-9_.#")
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
384 ppep_count = 0
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
385 for ppep in generate_ppep(f_name):
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
386 ppep_count += 1
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
387 add_to_trie = False
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
388 # print(ppep)
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
389 scrubbed = re_scrub.sub("", ppep)
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
390 deppep = re_phos.sub("", scrubbed)
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
391 if options.verbose:
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
392 print("deppep: %s; scrubbed: %s" % (deppep, scrubbed))
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
393 # print(deppep)
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
394 cur.execute("SELECT id FROM deppep WHERE seq = (?)", (deppep,))
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
395 if cur.fetchone() is None:
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
396 add_to_trie = True
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
397 cur.execute("INSERT INTO deppep(seq) VALUES (?)", (deppep,))
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
398 cur.execute("SELECT id FROM deppep WHERE seq = (?)", (deppep,))
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
399 deppep_id = cur.fetchone()[0]
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
400 if add_to_trie:
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
401 # print((deppep_id, deppep))
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
402 # Build the trie
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
403 auto.add_word(deppep, (deppep_id, deppep))
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
404 cur.execute(
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
405 "INSERT INTO ppep(seq, scrubbed, deppep_id) VALUES (?,?,?)",
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
406 (ppep, scrubbed, deppep_id),
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
407 )
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
408 # def generate_deppep():
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
409 # cur.execute("SELECT seq FROM deppep")
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
410 # for row in cur.fetchall():
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
411 # yield row[0]
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
412 cur.execute("SELECT count(*) FROM (SELECT seq FROM deppep GROUP BY seq)")
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
413 for row in cur.fetchall():
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
414 deppep_count = row[0]
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
415
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
416 cur.execute(
26
5b8e15b2a67c planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e0b80550743f634282b4b4348b75e6f172dc1488
eschen42
parents: 23
diff changeset
417 """
5b8e15b2a67c planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e0b80550743f634282b4b4348b75e6f172dc1488
eschen42
parents: 23
diff changeset
418 SELECT count(*) FROM (
5b8e15b2a67c planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e0b80550743f634282b4b4348b75e6f172dc1488
eschen42
parents: 23
diff changeset
419 SELECT Sequence FROM UniProtKB GROUP BY Sequence
5b8e15b2a67c planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e0b80550743f634282b4b4348b75e6f172dc1488
eschen42
parents: 23
diff changeset
420 )
5b8e15b2a67c planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e0b80550743f634282b4b4348b75e6f172dc1488
eschen42
parents: 23
diff changeset
421 """
23
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
422 )
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
423 for row in cur.fetchall():
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
424 sequence_count = row[0]
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
425
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
426 print("%d phosphopeptides were read from input" % ppep_count)
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
427 print(
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
428 "%d corresponding dephosphopeptides are represented in input"
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
429 % deppep_count
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
430 )
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
431 # Look for cases where both Gene_Name and Sequence are identical
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
432 cur.execute(
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
433 """
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
434 SELECT Uniprot_ID, Gene_Name, Sequence
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
435 FROM UniProtKB
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
436 WHERE Sequence IN (
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
437 SELECT Sequence
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
438 FROM UniProtKB
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
439 GROUP BY Sequence, Gene_Name
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
440 HAVING count(*) > 1
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
441 )
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
442 ORDER BY Sequence
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
443 """
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
444 )
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
445 duplicate_count = 0
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
446 old_seq = ""
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
447 for row in cur.fetchall():
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
448 if duplicate_count == 0:
26
5b8e15b2a67c planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e0b80550743f634282b4b4348b75e6f172dc1488
eschen42
parents: 23
diff changeset
449 print(" ".join([
5b8e15b2a67c planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e0b80550743f634282b4b4348b75e6f172dc1488
eschen42
parents: 23
diff changeset
450 "\nEach of the following sequences is associated with several",
5b8e15b2a67c planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e0b80550743f634282b4b4348b75e6f172dc1488
eschen42
parents: 23
diff changeset
451 "accession IDs (which are listed in the first column) but",
5b8e15b2a67c planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e0b80550743f634282b4b4348b75e6f172dc1488
eschen42
parents: 23
diff changeset
452 "the same gene ID (which is listed in the second column)."
5b8e15b2a67c planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e0b80550743f634282b4b4348b75e6f172dc1488
eschen42
parents: 23
diff changeset
453 ]))
23
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
454 if row[2] != old_seq:
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
455 old_seq = row[2]
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
456 duplicate_count += 1
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
457 if options.warn_duplicates:
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
458 print("\n%s\t%s\t%s" % row)
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
459 else:
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
460 if options.warn_duplicates:
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
461 print("%s\t%s" % (row[0], row[1]))
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
462 if duplicate_count > 0:
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
463 print(
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
464 "\n%d sequences have duplicated accession IDs\n" % duplicate_count
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
465 )
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
466
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
467 print("%s accession sequences will be searched\n" % sequence_count)
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
468
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
469 # print(auto.dump())
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
470
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
471 # Convert the trie to an automaton (a finite-state machine)
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
472 auto.make_automaton()
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
473
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
474 # Execute query for seqs and metadata without fetching the results yet
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
475 uniprot_seq_and_id = cur.execute(UNIPROT_SEQ_AND_ID_SQL)
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
476 while 1:
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
477 batch = uniprot_seq_and_id.fetchmany(size=50)
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
478 if not batch:
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
479 break
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
480 for Sequence, UniProtKB_id in batch:
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
481 if Sequence is not None:
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
482 for end_index, (insert_order, original_value) in auto.iter(
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
483 Sequence
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
484 ):
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
485 ker.execute(
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
486 """
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
487 INSERT INTO deppep_UniProtKB
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
488 (deppep_id,UniProtKB_id,pos_start,pos_end)
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
489 VALUES (?,?,?,?)
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
490 """,
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
491 (
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
492 insert_order,
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
493 UniProtKB_id,
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
494 1 + end_index - len(original_value),
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
495 end_index,
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
496 ),
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
497 )
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
498 else:
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
499 raise ValueError(
26
5b8e15b2a67c planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e0b80550743f634282b4b4348b75e6f172dc1488
eschen42
parents: 23
diff changeset
500 "UniProtKB_id %s, but Sequence is None: %s %s"
5b8e15b2a67c planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e0b80550743f634282b4b4348b75e6f172dc1488
eschen42
parents: 23
diff changeset
501 % (
5b8e15b2a67c planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e0b80550743f634282b4b4348b75e6f172dc1488
eschen42
parents: 23
diff changeset
502 UniProtKB_id,
5b8e15b2a67c planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e0b80550743f634282b4b4348b75e6f172dc1488
eschen42
parents: 23
diff changeset
503 "Check whether SwissProt file is missing",
5b8e15b2a67c planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e0b80550743f634282b4b4348b75e6f172dc1488
eschen42
parents: 23
diff changeset
504 "the sequence for this ID")
23
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
505 )
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
506 ker.execute(
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
507 """
26
5b8e15b2a67c planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e0b80550743f634282b4b4348b75e6f172dc1488
eschen42
parents: 23
diff changeset
508 SELECT
5b8e15b2a67c planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e0b80550743f634282b4b4348b75e6f172dc1488
eschen42
parents: 23
diff changeset
509 count(*) ||
5b8e15b2a67c planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e0b80550743f634282b4b4348b75e6f172dc1488
eschen42
parents: 23
diff changeset
510 ' accession-peptide-phosphopeptide combinations were found'
5b8e15b2a67c planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e0b80550743f634282b4b4348b75e6f172dc1488
eschen42
parents: 23
diff changeset
511 FROM
5b8e15b2a67c planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e0b80550743f634282b4b4348b75e6f172dc1488
eschen42
parents: 23
diff changeset
512 uniprotkb_pep_ppep_view
23
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
513 """
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
514 )
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
515 for row in ker.fetchall():
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
516 print(row[0])
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
517
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
518 ker.execute(
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
519 """
26
5b8e15b2a67c planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e0b80550743f634282b4b4348b75e6f172dc1488
eschen42
parents: 23
diff changeset
520 SELECT
5b8e15b2a67c planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e0b80550743f634282b4b4348b75e6f172dc1488
eschen42
parents: 23
diff changeset
521 count(*) || ' accession matches were found',
5b8e15b2a67c planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e0b80550743f634282b4b4348b75e6f172dc1488
eschen42
parents: 23
diff changeset
522 count(*) AS accession_count
23
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
523 FROM (
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
524 SELECT accession
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
525 FROM uniprotkb_pep_ppep_view
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
526 GROUP BY accession
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
527 )
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
528 """
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
529 )
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
530 for row in ker.fetchall():
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
531 print(row[0])
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
532
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
533 ker.execute(
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
534 """
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
535 SELECT count(*) || ' peptide matches were found'
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
536 FROM (
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
537 SELECT peptide
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
538 FROM uniprotkb_pep_ppep_view
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
539 GROUP BY peptide
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
540 )
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
541 """
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
542 )
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
543 for row in ker.fetchall():
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
544 print(row[0])
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
545
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
546 ker.execute(
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
547 """
26
5b8e15b2a67c planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e0b80550743f634282b4b4348b75e6f172dc1488
eschen42
parents: 23
diff changeset
548 SELECT
5b8e15b2a67c planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e0b80550743f634282b4b4348b75e6f172dc1488
eschen42
parents: 23
diff changeset
549 count(*) || ' phosphopeptide matches were found',
5b8e15b2a67c planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e0b80550743f634282b4b4348b75e6f172dc1488
eschen42
parents: 23
diff changeset
550 count(*) AS phosphopeptide_count
23
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
551 FROM (
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
552 SELECT phosphopeptide
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
553 FROM uniprotkb_pep_ppep_view
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
554 GROUP BY phosphopeptide
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
555 )
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
556 """
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
557 )
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
558 for row in ker.fetchall():
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
559 print(row[0])
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
560
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
561 # link peptides not found in sequence database to a dummy sequence-record
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
562 ker.execute(
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
563 """
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
564 INSERT INTO deppep_UniProtKB(deppep_id,UniProtKB_id,pos_start,pos_end)
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
565 SELECT id, 'No Uniprot_ID', 0, 0
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
566 FROM deppep
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
567 WHERE id NOT IN (SELECT deppep_id FROM deppep_UniProtKB)
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
568 """
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
569 )
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
570
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
571 con.commit()
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
572 ker.execute("vacuum")
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
573 con.close()
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
574
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
575
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
576 if __name__ == "__main__":
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
577 wrap_start_time = time.perf_counter()
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
578 __main__()
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
579 wrap_stop_time = time.perf_counter()
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
580 # print(wrap_start_time)
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
581 # print(wrap_stop_time)
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
582 print(
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
583 "\nThe matching process took %d milliseconds to run.\n"
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
584 % ((wrap_stop_time - wrap_start_time) * 1000),
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
585 )
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
586
3911581e639a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff changeset
587 # vim: sw=4 ts=4 et ai :