Mercurial > repos > galaxyp > mzsqlite_psm_align
annotate mzsqlite_psm_align.py @ 1:4f8cf8fbef57 draft default tip
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit b0c57cac4e558d974a16b14d4498cf8d4ba9e0c7
author | galaxyp |
---|---|
date | Thu, 19 Apr 2018 14:30:28 -0400 |
parents | f2dc9805107a |
children |
rev | line source |
---|---|
0
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
1 #!/usr/bin/env python |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
2 """ |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
3 # |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
4 #------------------------------------------------------------------------------ |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
5 # University of Minnesota |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
6 # Copyright 2017, Regents of the University of Minnesota |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
7 #------------------------------------------------------------------------------ |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
8 # Author: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
9 # |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
10 # James E Johnson |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
11 # |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
12 #------------------------------------------------------------------------------ |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
13 """ |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
14 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
15 from __future__ import print_function |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
16 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
17 import argparse |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
18 import re |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
19 import sys |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
20 import sqlite3 as sqlite |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
21 from time import time |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
22 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
23 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
24 from Bio.Seq import reverse_complement, translate |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
25 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
26 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
27 import pysam |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
28 from twobitreader import TwoBitFile |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
29 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
30 from profmt import PROBAM_DEFAULTS,ProBAM,ProBAMEntry,ProBED,ProBEDEntry |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
31 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
32 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
33 def regex_match(expr, item): |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
34 return re.match(expr, item) is not None |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
35 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
36 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
37 def regex_search(expr, item): |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
38 return re.search(expr, item) is not None |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
39 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
40 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
41 def regex_sub(expr, replace, item): |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
42 return re.sub(expr, replace, item) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
43 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
44 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
45 def get_connection(sqlitedb_path, addfunctions=True): |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
46 conn = sqlite.connect(sqlitedb_path) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
47 if addfunctions: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
48 conn.create_function("re_match", 2, regex_match) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
49 conn.create_function("re_search", 2, regex_search) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
50 conn.create_function("re_sub", 3, regex_sub) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
51 return conn |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
52 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
53 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
54 ## Peptide Spectral Match (PSM)s |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
55 PSM_QUERY = """\ |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
56 SELECT |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
57 pe.dBSequence_ref, |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
58 pe.start, |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
59 pe.end, |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
60 pe.pre, |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
61 pe.post, |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
62 pep.sequence, |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
63 sr.id, |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
64 sr.spectrumTitle, |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
65 si.rank, |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
66 si.chargeState, |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
67 si.calculatedMassToCharge, |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
68 si.experimentalMassToCharge, |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
69 si.peptide_ref |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
70 FROM spectrum_identification_results sr |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
71 JOIN spectrum_identification_result_items si ON si.spectrum_identification_result_ref = sr.id |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
72 JOIN peptide_evidence pe ON si.peptide_ref = pe.peptide_ref |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
73 JOIN peptides pep ON pe.peptide_ref = pep.id |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
74 WHERE pe.isDecoy = 'false' |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
75 ORDER BY sr.spectrumTitle,si.rank |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
76 """ |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
77 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
78 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
79 ## Peptide Post Translational Modifications |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
80 PEP_MODS_QUERY = """\ |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
81 SELECT location, residue, name, modType, '' as "unimod" |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
82 FROM peptide_modifications |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
83 WHERE peptide_ref = :peptide_ref |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
84 ORDER BY location, modType, name |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
85 """ |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
86 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
87 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
88 ## number of peptides to which the spectrum maps |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
89 ## spectrum_identification_results => spectrum_identification_result_items -> peptide_evidence |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
90 SPECTRUM_PEPTIDES_QUERY = """\ |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
91 SELECT count(distinct pep.sequence) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
92 FROM spectrum_identification_results sr |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
93 JOIN spectrum_identification_result_items si ON si.spectrum_identification_result_ref = sr.id |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
94 JOIN peptide_evidence pe ON si.peptide_ref = pe.peptide_ref |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
95 JOIN peptides pep ON pe.peptide_ref = pep.id |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
96 WHERE pe.isDecoy = 'false' |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
97 AND sr.id = :sr_id |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
98 GROUP BY sr.id |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
99 """ |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
100 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
101 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
102 ## number of genomic locations to which the peptide sequence maps |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
103 ## uniqueness of the peptide mapping |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
104 ## peptides => peptide_evidence -> db_sequence -> location |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
105 ## proteins_by_peptide |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
106 PEPTIDE_ACC_QUERY = """\ |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
107 SELECT |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
108 pe.dBSequence_ref, |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
109 pe.start, |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
110 pe.end |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
111 FROM peptide_evidence pe |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
112 JOIN peptides pep ON pe.peptide_ref = pep.id |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
113 WHERE pe.isDecoy = 'false' |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
114 AND pep.sequence = :sequence |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
115 """ |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
116 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
117 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
118 MAP_QUERY = """\ |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
119 SELECT distinct * |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
120 FROM feature_cds_map |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
121 WHERE name = :acc |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
122 AND :p_start < cds_end |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
123 AND :p_end >= cds_start |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
124 ORDER BY name,cds_start,cds_end |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
125 """ |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
126 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
127 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
128 GENOMIC_POS_QUERY = """\ |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
129 SELECT distinct chrom, CASE WHEN strand = '+' THEN start + :cds_offset - cds_start ELSE end - :cds_offset - cds_start END as "pos" |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
130 FROM feature_cds_map |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
131 WHERE name = :acc |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
132 AND :cds_offset >= cds_start |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
133 AND :cds_offset < cds_end |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
134 """ |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
135 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
136 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
137 FEATURE_QUERY = """\ |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
138 SELECT distinct seqid,start,end,featuretype,strand, |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
139 CAST(frame AS INTEGER) as "frame", CAST(frame AS INTEGER)==:frame as "in_frame", |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
140 CASE WHEN :strand = '+' THEN start - :start ELSE end - :end END as "start_pos", |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
141 CASE WHEN :strand = '+' THEN end - :end ELSE start - :start END as "end_pos", |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
142 parent as "name" |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
143 FROM features JOIN relations ON features.id = relations.child |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
144 WHERE seqid = :seqid |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
145 AND parent in ( |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
146 SELECT id |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
147 FROM features |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
148 WHERE seqid = :seqid |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
149 AND featuretype = 'transcript' |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
150 AND start <= :tstart AND :tend <= end |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
151 ) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
152 AND :end >= start AND :start <= end |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
153 AND featuretype in ('CDS','five_prime_utr','three_prime_utr','transcript') |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
154 ORDER BY strand = :strand DESC, featuretype |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
155 """ |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
156 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
157 ## Use order by to get best matches |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
158 ## one_exon strand, featuretype, contained, inframe, |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
159 ## first_exon strand, featuretype, contained, end_pos, |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
160 ## middle_exon strand, featuretype, contained, start_pos, end_pos, |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
161 ## last_exon strand, featuretype, contained, start_pos, |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
162 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
163 ONLY_EXON_QUERY = FEATURE_QUERY + """,\ |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
164 start <= :start AND end >= :end DESC, |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
165 in_frame DESC, end - start DESC, start DESC, end |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
166 """ |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
167 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
168 FIRST_EXON_QUERY = FEATURE_QUERY + """,\ |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
169 start <= :start AND end >= :end DESC, |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
170 in_frame DESC, abs(end_pos), start DESC, end |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
171 """ |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
172 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
173 MIDDLE_EXON_QUERY = FEATURE_QUERY + """,\ |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
174 start <= :start AND end >= :end DESC, |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
175 in_frame DESC, abs(start_pos), abs(end_pos), start DESC, end |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
176 """ |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
177 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
178 LAST_EXON_QUERY = FEATURE_QUERY + """,\ |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
179 start <= :start AND end >= :end DESC, |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
180 in_frame DESC, abs(start_pos), start DESC, end |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
181 """ |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
182 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
183 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
184 def __main__(): |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
185 parser = argparse.ArgumentParser( |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
186 description='Generate proBED and proBAM from mz.sqlite') |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
187 parser.add_argument('mzsqlite', help="mz.sqlite converted from mzIdentML") |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
188 parser.add_argument('genomic_mapping_sqlite', help="genomic_mapping.sqlite with feature_cds_map table") |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
189 parser.add_argument( |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
190 '-R', '--genomeReference', default='Unknown', |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
191 help='Genome reference sequence in 2bit format') |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
192 parser.add_argument( |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
193 '-t', '--twobit', default=None, |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
194 help='Genome reference sequence in 2bit format') |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
195 parser.add_argument( |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
196 '-r', '--reads_bam', default=None, |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
197 help='reads alignment bam path') |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
198 parser.add_argument( |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
199 '-g', '--gffutils_sqlite', default=None, |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
200 help='gffutils GTF sqlite DB') |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
201 parser.add_argument( |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
202 '-B', '--probed', default=None, |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
203 help='proBed path') |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
204 parser.add_argument( |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
205 '-s', '--prosam', default=None, |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
206 help='proSAM path') |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
207 parser.add_argument( |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
208 '-b', '--probam', default=None, |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
209 help='proBAM path') |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
210 parser.add_argument( |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
211 '-l', '--limit', type=int, default=None, |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
212 help='limit numbers of PSMs for testing') |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
213 parser.add_argument('-v', '--verbose', action='store_true', help='Verbose') |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
214 parser.add_argument('-d', '--debug', action='store_true', help='Debug') |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
215 args = parser.parse_args() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
216 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
217 def get_sequence(chrom, start, end): |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
218 if twobit: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
219 if chrom in twobit and 0 <= start < end < len(twobit[chrom]): |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
220 return twobit[chrom][start:end] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
221 contig = chrom[3:] if chrom.startswith('chr') else 'chr%s' % chrom |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
222 if contig in twobit and 0 <= start < end < len(twobit[contig]): |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
223 return twobit[contig][start:end] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
224 return '' |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
225 return None |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
226 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
227 twobit = TwoBitFile(args.twobit) if args.twobit else None |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
228 samfile = pysam.AlignmentFile(args.reads_bam, "rb" ) if args.reads_bam else None |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
229 seqlens = twobit.sequence_sizes() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
230 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
231 probed = open(args.probed,'w') if args.probed else sys.stdout |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
232 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
233 gff_cursor = get_connection(args.gffutils_sqlite).cursor() if args.gffutils_sqlite else None |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
234 map_cursor = get_connection(args.genomic_mapping_sqlite).cursor() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
235 mz_cursor = get_connection(args.mzsqlite).cursor() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
236 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
237 unmapped_accs = set() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
238 timings = dict() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
239 def add_time(name,elapsed): |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
240 if name in timings: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
241 timings[name] += elapsed |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
242 else: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
243 timings[name] = elapsed |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
244 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
245 XG_TYPES = ['N','V','W','J','A','M','C','E','B','O','T','R','I','G','D','U','X','*'] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
246 FT_TYPES = ['CDS','five_prime_utr','three_prime_utr','transcript'] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
247 def get_exon_features(exons): |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
248 efeatures = None |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
249 transcript_features = dict() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
250 t_start = min(exons[0][2],exons[-1][2]) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
251 t_end = max(exons[0][3],exons[-1][3]) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
252 if gff_cursor: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
253 ts = time() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
254 (acc,gc,gs,ge,st,cs,ce) = exons[0] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
255 ft_params = {"seqid" : str(gc).replace('chr',''), 'strand' : st, 'tstart': t_start, 'tend': t_end} |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
256 efeatures = [None] * len(exons) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
257 for i,exon in enumerate(exons): |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
258 (acc,gc,gs,ge,st,cs,ce) = exon |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
259 fr = cs % 3 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
260 if args.debug: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
261 print('exon:\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s' % (acc,gc,gs,ge,st,cs,ce,fr),file=sys.stderr) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
262 ft_params = {"seqid" : str(gc).replace('chr',''), "start" : gs + 1, "end" : ge, 'strand' : st, 'frame' : fr, 'tstart': t_start, 'tend': t_end} |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
263 if len(exons) == 1: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
264 q = ONLY_EXON_QUERY |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
265 elif i == 0: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
266 q = FIRST_EXON_QUERY |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
267 elif len(exons) - i == 1: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
268 q = LAST_EXON_QUERY |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
269 else: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
270 q = MIDDLE_EXON_QUERY |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
271 features = [f for f in gff_cursor.execute(q,ft_params)] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
272 transcripts = [] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
273 efeatures[i] = [] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
274 for j,f in enumerate(features): |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
275 transcript = f[-1] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
276 ftype = f[3] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
277 if ftype == 'transcript': |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
278 if i > 0 or efeatures[i]: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
279 continue |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
280 if i == 0: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
281 if transcript not in transcript_features: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
282 transcript_features[transcript] = [[] for _ in range(len(exons))] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
283 transcript_features[transcript][i].append(f) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
284 efeatures[i].append(f) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
285 else: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
286 if transcript in transcript_features: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
287 transcript_features[transcript][i].append(f) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
288 efeatures[i].append(f) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
289 else: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
290 del transcript_features[transcript] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
291 if not efeatures[i]: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
292 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
293 efeatures[i] = transcripts |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
294 for f in efeatures[i]: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
295 (seqid,start,end,featuretype,strand,frame,in_frame,start_offset,end_offset,parent) = f |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
296 if args.debug: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
297 print('feat%d:\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s' % \ |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
298 (i,seqid,start,end,featuretype,strand,frame,str(in_frame) == '1',start_offset,end_offset,parent),file=sys.stderr) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
299 if args.debug: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
300 print('fmap:\t%s' % transcript_features,file=sys.stderr) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
301 return (efeatures,transcript_features) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
302 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
303 def is_structural_variant(exons): |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
304 if len(exons) > 1: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
305 (acc,gc,gs,ge,st,cs,ce) = exons[0] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
306 for i in range(1,len(exons)): |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
307 if gc != exons[i][1] or st != exons[i][4]: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
308 return True |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
309 return False |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
310 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
311 XG_TYPES = ['N','V','W','J','A','M','C','E','B','O','T','R','I','G','D','U','X','*'] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
312 FT_TYPES = ['CDS','five_prime_utr','three_prime_utr','transcript'] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
313 def classify_transcript(exons,transcript_features,ref_prot,peptide): |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
314 etypes = ['*'] * len(exons) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
315 features = transcript_features[0] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
316 (acc,gc,gs,ge,st,cs,ce) = exons[0] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
317 if len(features) == 1: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
318 (seqid,start,end,featuretype,strand,frame,in_frame,start_offset,end_offset,parent) = features[0] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
319 if strand == st: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
320 if featuretype == 'CDS': |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
321 if start <= gs and ge <= end: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
322 if in_frame: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
323 if ref_prot == peptide: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
324 if len(exons) > 1: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
325 pass |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
326 return 'N' |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
327 elif len(ref_prot) != len(peptide): |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
328 return 'W' |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
329 else: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
330 return 'V' |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
331 else: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
332 return 'O' |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
333 elif strand == '+' and start <= gs or ge > end: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
334 return 'C' |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
335 elif strand == '-' and start <= gs and ge <= end: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
336 return 'C' |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
337 elif featuretype == 'five_prime_utr': |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
338 return 'E' |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
339 elif featuretype == 'three_prime_utr': |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
340 return 'B' |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
341 elif featuretype == 'transcript': |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
342 return 'I' |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
343 else: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
344 return 'R' |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
345 elif len(features) > 1: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
346 ftypes = [f[3] for f in features] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
347 if 'five_prime_utr' in ftypes: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
348 return 'E' |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
349 elif 'three_prime_utr' in ftypes: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
350 return 'B' |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
351 else: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
352 return 'C' |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
353 return '*' |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
354 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
355 def classify_peptide(exons,ref_prot,peptide,pep_cds,probam_dict): |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
356 if ref_prot != peptide and samfile: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
357 try: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
358 if args.debug: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
359 print('name: %s \nref: %s\npep: %s\n' % (scan_name,ref_prot,peptide), file=sys.stderr) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
360 ts = time() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
361 for exon in exons: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
362 (acc,chrom,start,end,strand,c_start,c_end) = exon |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
363 a_start = c_start / 3 * 3 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
364 a_end = c_end / 3 * 3 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
365 if ref_prot[a_start:a_end] != peptide[a_start:a_end]: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
366 pileup = get_exon_pileup(chrom,start,end) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
367 for i, (bi,ai,ao) in enumerate([(i,i / 3, i % 3) for i in range(c_start, c_end)]): |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
368 if ao == 0 or i == 0: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
369 if ref_prot[ai] != peptide[ai]: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
370 codon = get_pep_codon(pileup, bi - c_start, peptide[ai], ao) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
371 if args.debug: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
372 print('%d %d %d %s : %s %s %s' % (bi,ai,ao, peptide[ai], str(pep_cds[:bi]), str(codon), str(pep_cds[bi+3:])), file=sys.stderr) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
373 if codon: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
374 pep_cds = pep_cds[:bi] + codon + pep_cds[bi+3:] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
375 te = time() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
376 add_time('var_cds',te - ts) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
377 except Exception as e: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
378 print('name: %s \nref: %s\npep: %s\n%s\n' % (scan_name,ref_prot,peptide,e), file=sys.stderr) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
379 probam_dict['XG'] = '*' |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
380 if is_structural_variant(exons): |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
381 probam_dict['XG'] = 'G' |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
382 else: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
383 (efeatures,transcript_features) = get_exon_features(exons) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
384 n = len(exons) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
385 if efeatures and efeatures[0]: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
386 for f in efeatures[0]: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
387 transcript = f[9] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
388 features = transcript_features[transcript] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
389 xg = classify_transcript(exons,features,ref_prot,peptide) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
390 if xg != '*': |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
391 probam_dict['XG'] = xg |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
392 break |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
393 return pep_cds |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
394 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
395 def get_variant_cds(exons,ref_prot,peptide,pep_cds): |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
396 if ref_prot != peptide and samfile: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
397 try: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
398 if args.debug: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
399 print('name: %s \nref: %s\npep: %s\n' % (scan_name,ref_prot,peptide), file=sys.stderr) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
400 ts = time() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
401 for exon in exons: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
402 (acc,chrom,start,end,strand,c_start,c_end) = exon |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
403 a_start = c_start / 3 * 3 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
404 a_end = c_end / 3 * 3 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
405 if ref_prot[a_start:a_end] != peptide[a_start:a_end]: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
406 pileup = get_exon_pileup(chrom,start,end) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
407 for i, (bi,ai,ao) in enumerate([(i,i / 3, i % 3) for i in range(c_start, c_end)]): |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
408 if ao == 0 or i == 0: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
409 if ref_prot[ai] != peptide[ai]: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
410 codon = get_pep_codon(pileup, bi - c_start, peptide[ai], ao) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
411 if args.debug: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
412 print('%d %d %d %s : %s %s %s' % (bi,ai,ao, peptide[ai], str(pep_cds[:bi]), str(codon), str(pep_cds[bi+3:])), file=sys.stderr) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
413 if codon: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
414 pep_cds = pep_cds[:bi] + codon + pep_cds[bi+3:] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
415 te = time() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
416 add_time('var_cds',te - ts) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
417 except Exception as e: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
418 print('name: %s \nref: %s\npep: %s\n%s\n' % (scan_name,ref_prot,peptide,e), file=sys.stderr) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
419 return pep_cds |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
420 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
421 def get_mapping(acc,pep_start,pep_end): |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
422 ts = time() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
423 p_start = (pep_start - 1) * 3 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
424 p_end = pep_end * 3 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
425 map_params = {"acc" : acc, "p_start" : p_start, "p_end" : p_end} |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
426 if args.debug: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
427 print('%s' % map_params, file=sys.stderr) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
428 locs = [l for l in map_cursor.execute(MAP_QUERY,map_params)] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
429 exons = [] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
430 ## ========= pep |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
431 ## --- continue |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
432 ## --- trim |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
433 ## --- copy |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
434 ## --- trim |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
435 ## --- break |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
436 c_end = 0 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
437 for i, (acc,chrom,start,end,strand,cds_start,cds_end) in enumerate(locs): |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
438 if args.debug: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
439 print('Prot: %s\t%s:%d-%d\t%s\t%d\t%d' % (acc,chrom,start,end,strand,cds_start,cds_end),file=sys.stderr) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
440 c_start = c_end |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
441 if cds_end < p_start: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
442 continue |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
443 if cds_start >= p_end: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
444 break |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
445 if strand == '+': |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
446 if cds_start < p_start: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
447 start += p_start - cds_start |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
448 if cds_end > p_end: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
449 end -= cds_end - p_end |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
450 else: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
451 if cds_start < p_start: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
452 end -= p_start - cds_start |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
453 if cds_end > p_end: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
454 start += cds_end - p_end |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
455 c_end = c_start + abs(end - start) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
456 if args.debug: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
457 print('Pep: %s\t%s:%d-%d\t%s\t%d\t%d' % (acc,chrom,start,end,strand,cds_start,cds_end),file=sys.stderr) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
458 exons.append([acc,chrom,start,end,strand,c_start,c_end]) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
459 te = time() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
460 add_time('get_mapping',te - ts) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
461 return exons |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
462 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
463 def get_cds(exons): |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
464 ts = time() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
465 seqs = [] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
466 for i, (acc,chrom,start,end,strand,cds_start,cds_end) in enumerate(exons): |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
467 seq = get_sequence(chrom, min(start,end), max(start,end)) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
468 if strand == '-': |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
469 seq = reverse_complement(seq) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
470 seqs.append(seq) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
471 te = time() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
472 add_time('get_cds',te - ts) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
473 if args.debug: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
474 print('CDS: %s' % str(seqs),file=sys.stderr) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
475 return ''.join(seqs) if seqs else '' |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
476 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
477 def genomic_mapping_count(peptide): |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
478 ts = time() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
479 params = {"sequence" : peptide} |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
480 acc_locs = [l for l in mz_cursor.execute(PEPTIDE_ACC_QUERY,params)] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
481 te = time() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
482 add_time('PEPTIDE_ACC_QUERY',te - ts) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
483 if acc_locs: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
484 if len(acc_locs) == 1: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
485 return 1 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
486 locations = set() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
487 for i,acc_loc in enumerate(acc_locs): |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
488 (acc,pep_start,pep_end) = acc_loc |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
489 if acc in unmapped_accs: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
490 continue |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
491 try: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
492 add_time('GENOMIC_POS_QUERY_COUNT',1) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
493 ts = time() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
494 p_start = pep_start * 3 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
495 p_end = pep_end * 3 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
496 params = {"acc" : acc, "cds_offset" : p_start} |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
497 (start_chrom,start_pos) = map_cursor.execute(GENOMIC_POS_QUERY, params).fetchone() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
498 params = {"acc" : acc, "cds_offset" : p_end} |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
499 (end_chrom,end_pos) = map_cursor.execute(GENOMIC_POS_QUERY, params).fetchone() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
500 locations.add('%s:%s-%s:%s' % (start_chrom,start_pos,end_chrom,end_pos)) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
501 te = time() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
502 add_time('GENOMIC_POS_QUERY',te - ts) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
503 except: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
504 unmapped_accs.add(acc) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
505 if args.debug: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
506 print('Unmapped: %s' % acc, file=sys.stderr) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
507 return len(locations) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
508 return -1 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
509 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
510 def spectrum_peptide_count(spectrum_id): |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
511 ts = time() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
512 params = {"sr_id" : spectrum_id} |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
513 pep_count = mz_cursor.execute(SPECTRUM_PEPTIDES_QUERY, params).fetchone()[0] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
514 te = time() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
515 add_time('SPECTRUM_PEPTIDES_QUERY',te - ts) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
516 return pep_count |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
517 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
518 def get_exon_pileup(chrom,chromStart,chromEnd): |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
519 cols = [] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
520 for pileupcolumn in samfile.pileup(chrom, chromStart, chromEnd): |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
521 if chromStart <= pileupcolumn.reference_pos <= chromEnd: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
522 bases = dict() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
523 col = {'depth' : 0, 'cov' : pileupcolumn.nsegments, 'pos': pileupcolumn.reference_pos, 'bases' : bases} |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
524 for pileupread in pileupcolumn.pileups: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
525 if not pileupread.is_del and not pileupread.is_refskip: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
526 col['depth'] += 1 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
527 base = pileupread.alignment.query_sequence[pileupread.query_position] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
528 if base not in bases: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
529 bases[base] = 1 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
530 else: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
531 bases[base] += 1 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
532 cols.append(col) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
533 return cols |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
534 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
535 codon_map = {"TTT":"F", "TTC":"F", "TTA":"L", "TTG":"L", |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
536 "TCT":"S", "TCC":"S", "TCA":"S", "TCG":"S", |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
537 "TAT":"Y", "TAC":"Y", "TAA":"*", "TAG":"*", |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
538 "TGT":"C", "TGC":"C", "TGA":"*", "TGG":"W", |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
539 "CTT":"L", "CTC":"L", "CTA":"L", "CTG":"L", |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
540 "CCT":"P", "CCC":"P", "CCA":"P", "CCG":"P", |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
541 "CAT":"H", "CAC":"H", "CAA":"Q", "CAG":"Q", |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
542 "CGT":"R", "CGC":"R", "CGA":"R", "CGG":"R", |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
543 "ATT":"I", "ATC":"I", "ATA":"I", "ATG":"M", |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
544 "ACT":"T", "ACC":"T", "ACA":"T", "ACG":"T", |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
545 "AAT":"N", "AAC":"N", "AAA":"K", "AAG":"K", |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
546 "AGT":"S", "AGC":"S", "AGA":"R", "AGG":"R", |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
547 "GTT":"V", "GTC":"V", "GTA":"V", "GTG":"V", |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
548 "GCT":"A", "GCC":"A", "GCA":"A", "GCG":"A", |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
549 "GAT":"D", "GAC":"D", "GAA":"E", "GAG":"E", |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
550 "GGT":"G", "GGC":"G", "GGA":"G", "GGG":"G",} |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
551 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
552 aa_codon_map = dict() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
553 for c,a in codon_map.items(): |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
554 aa_codon_map[a] = [c] if a not in aa_codon_map else aa_codon_map[a] + [c] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
555 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
556 aa_na_map = dict() # m[aa]{bo : {b1 : [b3] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
557 for c,a in codon_map.items(): |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
558 if a not in aa_na_map: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
559 aa_na_map[a] = dict() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
560 d = aa_na_map[a] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
561 for i in range(3): |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
562 b = c[i] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
563 if i < 2: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
564 if b not in d: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
565 d[b] = dict() if i < 1 else set() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
566 d = d[b] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
567 else: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
568 d.add(b) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
569 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
570 def get_pep_codon(pileup, idx, aa, ao): |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
571 try: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
572 ts = time() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
573 bases = [] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
574 for i in range(3): |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
575 if i < ao: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
576 bases.append(list(set([c[i] for c in aa_codon_map[aa]]))) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
577 else: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
578 bases.append([b for b, cnt in reversed(sorted(pileup[idx + i]['bases'].iteritems(), key=lambda (k,v): (v,k)))]) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
579 if args.debug: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
580 print('%s' % bases,file=sys.stderr) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
581 for b0 in bases[0]: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
582 if b0 not in aa_na_map[aa]: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
583 continue |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
584 for b1 in bases[1]: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
585 if b1 not in aa_na_map[aa][b0]: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
586 continue |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
587 for b2 in bases[2]: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
588 if b2 in aa_na_map[aa][b0][b1]: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
589 return '%s%s%s' % (b0,b1,b2) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
590 te = time() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
591 add_time('pep_codon',te - ts) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
592 except Exception as e: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
593 print("get_pep_codon: %s %s %s %s" |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
594 % (aa, ao, idx, pileup), file=sys.stderr) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
595 raise e |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
596 return None |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
597 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
598 def write_probed(chrom,chromStart,chromEnd,strand,blockCount,blockSizes,blockStarts, |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
599 spectrum,protacc,peptide,uniqueness,genomeReference,score=1000, |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
600 psmScore='.', fdr='.', mods='.', charge='.', |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
601 expMassToCharge='.', calcMassToCharge='.', |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
602 psmRank='.', datasetID='.', uri='.'): |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
603 probed.write('%s\t%d\t%d\t%s\t%d\t%s\t%d\t%d\t%s\t%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % \ |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
604 (chrom,chromStart,chromEnd,spectrum,score,strand,chromStart,chromEnd,'0',blockCount, |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
605 ','.join([str(v) for v in blockSizes]), |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
606 ','.join([str(v) for v in blockStarts]), |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
607 protacc,peptide,uniqueness, genomeReference, |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
608 psmScore, fdr, mods, charge, expMassToCharge, calcMassToCharge, psmRank, datasetID, uri)) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
609 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
610 def get_genomic_location(exons): |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
611 chrom = exons[0][1] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
612 strand = exons[0][4] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
613 pos = [exon[2] for exon in exons] + [exon[3] for exon in exons] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
614 chromStart = min(pos) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
615 chromEnd = max(pos) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
616 blockCount = len(exons) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
617 blockSizes = [abs(exon[3] - exon[2]) for exon in exons] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
618 blockStarts = [min(exon[2],exon[3]) - chromStart for exon in exons] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
619 return (chrom,chromStart,chromEnd,strand,blockCount,blockSizes,blockStarts) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
620 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
621 def get_psm_modifications(peptide_ref): |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
622 mods = [] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
623 ts = time() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
624 params = {"peptide_ref" : peptide_ref} |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
625 pepmods = [m for m in mz_cursor.execute(PEP_MODS_QUERY, params)] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
626 if pepmods: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
627 for (location, residue, name, modType, unimod) in pepmods: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
628 mods.append('%s-%s' % (location, unimod if unimod else '%s%s' % (name,residue))) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
629 te = time() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
630 add_time('PEP_MODS_QUERY',te - ts) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
631 return ';'.join(mods) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
632 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
633 ## iterate through PSMs |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
634 psm_cursor = get_connection(args.mzsqlite).cursor() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
635 ts = time() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
636 psms = psm_cursor.execute(PSM_QUERY) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
637 te = time() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
638 add_time('PSM_QUERY',te - ts) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
639 proBAM = ProBAM(species=None,assembly=args.genomeReference,seqlens=seqlens,comments=[]) if args.prosam or args.probam else None |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
640 proBED = ProBED(species=None,assembly=args.genomeReference,comments=[]) if args.probed else None |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
641 for i, psm in enumerate(psms): |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
642 probam_dict = PROBAM_DEFAULTS.copy() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
643 (acc,pep_start,pep_end,aa_pre,aa_post,peptide,spectrum_id,spectrum_title,rank,charge,calcmass,exprmass,pepref) = psm |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
644 scan_name = spectrum_title if spectrum_title else spectrum_id |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
645 if args.debug: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
646 print('\nPSM: %d\t%s' % (i, '\t'.join([str(v) for v in (acc,pep_start,pep_end,peptide,spectrum_id,scan_name,rank,charge,calcmass,exprmass)])), file=sys.stderr) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
647 exons = get_mapping(acc,pep_start,pep_end) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
648 if args.debug: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
649 print('%s' % exons, file=sys.stderr) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
650 if not exons: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
651 continue |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
652 mods = get_psm_modifications(pepref) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
653 (chrom,chromStart,chromEnd,strand,blockCount,blockSizes,blockStarts) = get_genomic_location(exons) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
654 ref_cds = get_cds(exons) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
655 if args.debug: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
656 print('%s' % ref_cds, file=sys.stderr) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
657 ref_prot = translate(ref_cds) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
658 if args.debug: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
659 print('%s' % ref_prot, file=sys.stderr) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
660 print('%s' % peptide, file=sys.stderr) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
661 spectrum_peptides = spectrum_peptide_count(spectrum_id) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
662 peptide_locations = genomic_mapping_count(peptide) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
663 if args.debug: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
664 print('spectrum_peptide_count: %d\tpeptide_location_count: %d' % (spectrum_peptides,peptide_locations), file=sys.stderr) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
665 uniqueness = 'unique' if peptide_locations == 1 else 'not-unique[unknown]' |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
666 if proBED is not None: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
667 ts = time() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
668 proBEDEntry = ProBEDEntry(chrom,chromStart,chromEnd, |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
669 '%s_%s' % (acc,scan_name), |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
670 1000,strand, |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
671 blockCount,blockSizes,blockStarts, |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
672 acc,peptide,uniqueness,args.genomeReference, |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
673 charge=charge,expMassToCharge=exprmass,calcMassToCharge=calcmass, |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
674 mods=mods if mods else '.', psmRank=rank) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
675 proBED.add_entry(proBEDEntry) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
676 te = time() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
677 add_time('add_probed',te - ts) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
678 if proBAM is not None: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
679 if len(ref_prot) != len(peptide): |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
680 pass |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
681 # continue |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
682 ts = time() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
683 probam_dict['NH'] = peptide_locations |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
684 probam_dict['XO'] = uniqueness |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
685 probam_dict['XL'] = peptide_locations |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
686 probam_dict['XP'] = peptide |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
687 probam_dict['YP'] = acc |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
688 probam_dict['XC'] = charge |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
689 probam_dict['XB'] = '%f;%f;%f' % (exprmass - calcmass, exprmass, calcmass) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
690 probam_dict['XR'] = ref_prot # ? dbSequence |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
691 probam_dict['YA'] = aa_post |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
692 probam_dict['YB'] = aa_pre |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
693 probam_dict['XM'] = mods if mods else '*' |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
694 flag = 16 if strand == '-' else 0 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
695 if str(rank)!=str(1) and rank!='*' and rank!=[] and rank!="": |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
696 flag += 256 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
697 probam_dict['XF'] = ','.join([str(e[2] % 3) for e in exons]) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
698 ## what if structural variant, need to split into multiple entries |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
699 ## probam_dict['XG'] = peptide_type |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
700 pep_cds = classify_peptide(exons,ref_prot,peptide,ref_cds,probam_dict) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
701 ## probam_dict['MD'] = peptide |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
702 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
703 ## FIX SAM sequence is forward strand |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
704 seq = pep_cds if strand == '+' else reverse_complement(pep_cds) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
705 ## cigar based on plus strand |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
706 cigar = '' |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
707 if strand == '+': |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
708 blkStarts = blockStarts |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
709 blkSizes = blockSizes |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
710 else: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
711 blkStarts = [x for x in reversed(blockStarts)] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
712 blkSizes = [x for x in reversed(blockSizes)] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
713 for j in range(blockCount): |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
714 if j > 0: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
715 intron = blkStarts[j] - (blkStarts[j-1] + blkSizes[j-1]) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
716 if intron > 0: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
717 cigar += '%dN' % intron |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
718 cigar += '%dM' % blkSizes[j] |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
719 proBAMEntry = ProBAMEntry(qname=scan_name, flag=flag, rname=chrom, pos=chromStart+1, |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
720 cigar=cigar,seq=seq,optional=probam_dict) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
721 proBAM.add_entry(proBAMEntry) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
722 te = time() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
723 add_time('add_probam',te - ts) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
724 if args.debug: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
725 print('%s' % probam_dict, file=sys.stderr) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
726 if args.limit and i >= args.limit: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
727 break |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
728 if args.probed: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
729 ts = time() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
730 with open(args.probed,'w') as fh: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
731 proBED.write(fh) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
732 te = time() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
733 add_time('write_probed',te - ts) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
734 if args.prosam or args.probam: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
735 samfile = args.prosam if args.prosam else 'temp.sam' |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
736 ts = time() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
737 with open(samfile,'w') as fh: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
738 proBAM.write(fh) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
739 te = time() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
740 add_time('write_prosam',te - ts) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
741 if args.probam: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
742 ts = time() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
743 bamfile = args.prosam.replace('.sam','.bam') |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
744 pysam.view(samfile, '-b', '-o', args.probam, catch_stdout=False) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
745 te = time() |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
746 add_time('write_probam',te - ts) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
747 pysam.index(args.probam) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
748 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
749 if args.verbose: |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
750 print('\n%s\n' % str(timings), file=sys.stderr) |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
751 |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
752 if __name__ == "__main__": |
f2dc9805107a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit 464e05be1084ed9a65b542c8eabb18147d425666
galaxyp
parents:
diff
changeset
|
753 __main__() |