annotate cummerbund_to_tabular.py @ 0:21d03c46f286 draft default tip

Uploaded
author devteam
date Thu, 02 Apr 2015 16:13:37 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
21d03c46f286 Uploaded
devteam
parents:
diff changeset
1 import os
21d03c46f286 Uploaded
devteam
parents:
diff changeset
2 import argparse
21d03c46f286 Uploaded
devteam
parents:
diff changeset
3 import sys
21d03c46f286 Uploaded
devteam
parents:
diff changeset
4 import string
21d03c46f286 Uploaded
devteam
parents:
diff changeset
5 import sqlite3
21d03c46f286 Uploaded
devteam
parents:
diff changeset
6
21d03c46f286 Uploaded
devteam
parents:
diff changeset
7 import logging
21d03c46f286 Uploaded
devteam
parents:
diff changeset
8
21d03c46f286 Uploaded
devteam
parents:
diff changeset
9
21d03c46f286 Uploaded
devteam
parents:
diff changeset
10 class CummerbundParser(object):
21d03c46f286 Uploaded
devteam
parents:
diff changeset
11
21d03c46f286 Uploaded
devteam
parents:
diff changeset
12 def __init__(self, opts):
21d03c46f286 Uploaded
devteam
parents:
diff changeset
13 self.cummerbund_db = opts.filename
21d03c46f286 Uploaded
devteam
parents:
diff changeset
14 self.session = sqlite3.connect( os.path.abspath( self.cummerbund_db ) )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
15
21d03c46f286 Uploaded
devteam
parents:
diff changeset
16 def generate_file( self, table ):
21d03c46f286 Uploaded
devteam
parents:
diff changeset
17 if hasattr( self, table ):
21d03c46f286 Uploaded
devteam
parents:
diff changeset
18 with open( '%s.tabular' % table, 'w' ) as self.fh:
21d03c46f286 Uploaded
devteam
parents:
diff changeset
19 getattr( self, table )()
21d03c46f286 Uploaded
devteam
parents:
diff changeset
20 else:
21d03c46f286 Uploaded
devteam
parents:
diff changeset
21 print 'Table %s is not supported or does not exist.' % table
21d03c46f286 Uploaded
devteam
parents:
diff changeset
22
21d03c46f286 Uploaded
devteam
parents:
diff changeset
23 def __write_line(self, line):
21d03c46f286 Uploaded
devteam
parents:
diff changeset
24 columns = []
21d03c46f286 Uploaded
devteam
parents:
diff changeset
25 for col in line:
21d03c46f286 Uploaded
devteam
parents:
diff changeset
26 if isinstance( col, float ):
21d03c46f286 Uploaded
devteam
parents:
diff changeset
27 if str( col ) in [ '-inf', 'inf' ]:
21d03c46f286 Uploaded
devteam
parents:
diff changeset
28 columns.append( str( col ) )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
29 elif col == int(col):
21d03c46f286 Uploaded
devteam
parents:
diff changeset
30 columns.append( str( int( col ) ) )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
31 else:
21d03c46f286 Uploaded
devteam
parents:
diff changeset
32 columns.append( str( col ) )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
33 elif col is None:
21d03c46f286 Uploaded
devteam
parents:
diff changeset
34 columns.append( '-' )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
35 else:
21d03c46f286 Uploaded
devteam
parents:
diff changeset
36 columns.append( str( col ) )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
37 print >>self.fh, '\t'.join( columns )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
38
21d03c46f286 Uploaded
devteam
parents:
diff changeset
39 def __get_diff_from_table( self, table, identifier ):
21d03c46f286 Uploaded
devteam
parents:
diff changeset
40 columns = [ '${table}.${identifier}', '${table}.gene_id', 'genes.gene_short_name', 'genes.locus',
21d03c46f286 Uploaded
devteam
parents:
diff changeset
41 '${table}.sample_1', '${table}.sample_2', '${table}.status',
21d03c46f286 Uploaded
devteam
parents:
diff changeset
42 '${table}.value_1', '${table}.value_2', '${table}.JS_dist',
21d03c46f286 Uploaded
devteam
parents:
diff changeset
43 '${table}.test_stat', '${table}.p_value', '${table}.q_value',
21d03c46f286 Uploaded
devteam
parents:
diff changeset
44 '${table}.significant' ]
21d03c46f286 Uploaded
devteam
parents:
diff changeset
45 query = string.Template( 'SELECT %s FROM ${table} JOIN genes on ${table}.gene_id = genes.gene_id' % ', '.join(columns) )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
46 result = self.session.execute( query.safe_substitute( table=table, identifier=identifier ) )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
47 self.__write_line( [ 'test_id', 'gene_id', 'gene', 'locus', 'sample_1',
21d03c46f286 Uploaded
devteam
parents:
diff changeset
48 'sample_2', 'status', 'value_1', 'value_2', 'sqrt(JS)',
21d03c46f286 Uploaded
devteam
parents:
diff changeset
49 'test_stat', 'p_value', 'q_value', 'significant' ] )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
50 for row in result:
21d03c46f286 Uploaded
devteam
parents:
diff changeset
51 self.__write_line( row )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
52
21d03c46f286 Uploaded
devteam
parents:
diff changeset
53 def __get_read_group_data( self, table, identifier ):
21d03c46f286 Uploaded
devteam
parents:
diff changeset
54 header = [ 'tracking_id', 'condition', 'replicate', 'raw_frags',
21d03c46f286 Uploaded
devteam
parents:
diff changeset
55 'internal_scaled_frags', 'external_scaled_frags', 'FPKM',
21d03c46f286 Uploaded
devteam
parents:
diff changeset
56 'effective_length', 'status' ]
21d03c46f286 Uploaded
devteam
parents:
diff changeset
57 columns = [ identifier, 'sample_name', 'replicate', 'raw_frags',
21d03c46f286 Uploaded
devteam
parents:
diff changeset
58 'internal_scaled_frags', 'external_scaled_frags', 'fpkm',
21d03c46f286 Uploaded
devteam
parents:
diff changeset
59 'effective_length', 'status' ]
21d03c46f286 Uploaded
devteam
parents:
diff changeset
60 self.__write_line( header )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
61 for row in self.session.execute( 'SELECT %s FROM %s' % ( ', '.join( columns ), table ) ):
21d03c46f286 Uploaded
devteam
parents:
diff changeset
62 self.__write_line( row )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
63
21d03c46f286 Uploaded
devteam
parents:
diff changeset
64
21d03c46f286 Uploaded
devteam
parents:
diff changeset
65 def __get_exp_diff( self, table, data_table, data_table_as, column ):
21d03c46f286 Uploaded
devteam
parents:
diff changeset
66 header = [ 'test_id', 'gene_id', 'gene', 'locus', 'sample_1', 'sample_2',
21d03c46f286 Uploaded
devteam
parents:
diff changeset
67 'status', 'value_1', 'value_2', 'log2(fold_change)', 'test_stat',
21d03c46f286 Uploaded
devteam
parents:
diff changeset
68 'p_value', 'q_value', 'significant' ]
21d03c46f286 Uploaded
devteam
parents:
diff changeset
69 columns = [ '${dtas}.${column}', '${table}.gene_id', '${table}.gene_short_name', '${table}.locus',
21d03c46f286 Uploaded
devteam
parents:
diff changeset
70 '${dtas}.sample_1', '${dtas}.sample_2', '${dtas}.status',
21d03c46f286 Uploaded
devteam
parents:
diff changeset
71 '${dtas}.value_1', '${dtas}.value_2', '${dtas}.log2_fold_change',
21d03c46f286 Uploaded
devteam
parents:
diff changeset
72 '${dtas}.test_stat', '${dtas}.p_value', '${dtas}.q_value',
21d03c46f286 Uploaded
devteam
parents:
diff changeset
73 '${dtas}.significant' ]
21d03c46f286 Uploaded
devteam
parents:
diff changeset
74 query = string.Template( 'SELECT %s FROM ${dtab} as ${dtas} JOIN ${table} on ${dtas}.${column} = ${table}.${column}' % ', '.join( columns ) )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
75 self.__write_line( header )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
76 for row in self.session.execute( query.safe_substitute( dtas=data_table_as, dtab=data_table, table=table, column=column ) ):
21d03c46f286 Uploaded
devteam
parents:
diff changeset
77 self.__write_line( row )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
78
21d03c46f286 Uploaded
devteam
parents:
diff changeset
79 def __get_per_sample_fpkm( self, identifiers, table, column ):
21d03c46f286 Uploaded
devteam
parents:
diff changeset
80 columns = []
21d03c46f286 Uploaded
devteam
parents:
diff changeset
81 for identifier in identifiers:
21d03c46f286 Uploaded
devteam
parents:
diff changeset
82 samples = self.session.execute( "SELECT sample_name FROM %s WHERE %s = '%s' ORDER BY sample_name ASC" % ( table, column, identifier[0] ) )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
83 for sample in samples:
21d03c46f286 Uploaded
devteam
parents:
diff changeset
84 sample_name = sample[0]
21d03c46f286 Uploaded
devteam
parents:
diff changeset
85 columns.extend( [ '%s_FPKM' % sample_name,
21d03c46f286 Uploaded
devteam
parents:
diff changeset
86 '%s_conf_lo' % sample_name,
21d03c46f286 Uploaded
devteam
parents:
diff changeset
87 '%s_conf_hi' % sample_name,
21d03c46f286 Uploaded
devteam
parents:
diff changeset
88 '%s_status' % sample_name ] )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
89 return columns
21d03c46f286 Uploaded
devteam
parents:
diff changeset
90
21d03c46f286 Uploaded
devteam
parents:
diff changeset
91 def __get_fpkms( self, table, data_table, column ):
21d03c46f286 Uploaded
devteam
parents:
diff changeset
92 tss_columns = [ column, 'class_code', 'nearest_ref_id', 'gene_id',
21d03c46f286 Uploaded
devteam
parents:
diff changeset
93 'gene_short_name', column, 'locus', 'length', 'coverage' ]
21d03c46f286 Uploaded
devteam
parents:
diff changeset
94 output_cols = [ 'tracking_id', 'class_code', 'nearest_ref_id', 'gene_id', 'gene_short_name',
21d03c46f286 Uploaded
devteam
parents:
diff changeset
95 'tss_id', 'locus', 'length', 'coverage' ]
21d03c46f286 Uploaded
devteam
parents:
diff changeset
96 tss_groups = self.session.execute( 'SELECT %s FROM %s LIMIT 1' % ( ', '.join( tss_columns ), table ) )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
97 output_cols.extend( self.__get_per_sample_fpkm( identifiers=tss_groups, column=column, table=data_table ) )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
98 self.__write_line( output_cols )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
99 tss_groups = self.session.execute( 'SELECT %s FROM %s' % ( ', '.join( tss_columns ), table ) )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
100 for tss_group in tss_groups:
21d03c46f286 Uploaded
devteam
parents:
diff changeset
101 out_data = list( tss_group )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
102 samples = self.session.execute( "SELECT fpkm, conf_hi, conf_lo, quant_status FROM %s WHERE %s = '%s' ORDER BY sample_name ASC" % ( data_table, column, tss_group[0] ) )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
103 for sample in samples:
21d03c46f286 Uploaded
devteam
parents:
diff changeset
104 out_data.extend( list( sample ) )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
105 self.__write_line( out_data )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
106
21d03c46f286 Uploaded
devteam
parents:
diff changeset
107 def __get_count_data( self, table, column ):
21d03c46f286 Uploaded
devteam
parents:
diff changeset
108 output_cols = [ 'tracking_id' ]
21d03c46f286 Uploaded
devteam
parents:
diff changeset
109 tss_groups = self.session.execute( 'SELECT %s FROM %s LIMIT 1' % ( column, table ) )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
110 output_cols.extend( self.__get_per_sample_count_cols( identifiers=tss_groups, table=table, column=column ) )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
111 self.__write_line( output_cols )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
112 self.__get_per_sample_count_data( table=table, column=column )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
113
21d03c46f286 Uploaded
devteam
parents:
diff changeset
114 def __get_per_sample_count_data( self, table, column ):
21d03c46f286 Uploaded
devteam
parents:
diff changeset
115 result = self.session.execute( 'SELECT DISTINCT(%s) FROM %s' % ( column, table ) )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
116 for row in result:
21d03c46f286 Uploaded
devteam
parents:
diff changeset
117 isoform_id = row[0]
21d03c46f286 Uploaded
devteam
parents:
diff changeset
118 output_data = [ isoform_id ]
21d03c46f286 Uploaded
devteam
parents:
diff changeset
119 per_sample = self.session.execute( "SELECT count, variance, uncertainty, dispersion, status FROM %s WHERE %s = '%s' ORDER BY sample_name ASC" % ( table, column, isoform_id ) )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
120 for samplerow in per_sample:
21d03c46f286 Uploaded
devteam
parents:
diff changeset
121 output_data.extend( list( samplerow ) )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
122 self.__write_line( output_data )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
123
21d03c46f286 Uploaded
devteam
parents:
diff changeset
124 def __get_per_sample_count_cols( self, identifiers, table, column ):
21d03c46f286 Uploaded
devteam
parents:
diff changeset
125 columns = []
21d03c46f286 Uploaded
devteam
parents:
diff changeset
126 for identifier in identifiers:
21d03c46f286 Uploaded
devteam
parents:
diff changeset
127 samples = self.session.execute( "SELECT sample_name FROM %s WHERE %s = '%s' ORDER BY sample_name ASC" % ( table, column, identifier[0] ) )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
128 for sample in samples:
21d03c46f286 Uploaded
devteam
parents:
diff changeset
129 sample_name = sample[0]
21d03c46f286 Uploaded
devteam
parents:
diff changeset
130 columns.extend( [ '%s_count' % sample_name,
21d03c46f286 Uploaded
devteam
parents:
diff changeset
131 '%s_count_variance' % sample_name,
21d03c46f286 Uploaded
devteam
parents:
diff changeset
132 '%s_count_uncertainty_var' % sample_name,
21d03c46f286 Uploaded
devteam
parents:
diff changeset
133 '%s_count_dispersion_var' % sample_name,
21d03c46f286 Uploaded
devteam
parents:
diff changeset
134 '%s_status' % sample_name ] )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
135 return columns
21d03c46f286 Uploaded
devteam
parents:
diff changeset
136
21d03c46f286 Uploaded
devteam
parents:
diff changeset
137 def splicing_diff( self ):
21d03c46f286 Uploaded
devteam
parents:
diff changeset
138 self.__get_diff_from_table( 'splicingDiffData', 'TSS_group_id' )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
139
21d03c46f286 Uploaded
devteam
parents:
diff changeset
140 def promoters_diff( self ):
21d03c46f286 Uploaded
devteam
parents:
diff changeset
141 self.__get_diff_from_table( 'promoterDiffData', 'gene_id' )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
142
21d03c46f286 Uploaded
devteam
parents:
diff changeset
143 def cds_diff( self ):
21d03c46f286 Uploaded
devteam
parents:
diff changeset
144 self.__get_diff_from_table( 'CDSDiffData', 'gene_id' )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
145
21d03c46f286 Uploaded
devteam
parents:
diff changeset
146 def tss_fpkm( self ):
21d03c46f286 Uploaded
devteam
parents:
diff changeset
147 data_table = 'TSSData'
21d03c46f286 Uploaded
devteam
parents:
diff changeset
148 table = 'TSS'
21d03c46f286 Uploaded
devteam
parents:
diff changeset
149 column = 'TSS_group_id'
21d03c46f286 Uploaded
devteam
parents:
diff changeset
150 self.__get_fpkms( data_table=data_table, table=table, column=column )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
151
21d03c46f286 Uploaded
devteam
parents:
diff changeset
152 def isoform_fpkm( self ):
21d03c46f286 Uploaded
devteam
parents:
diff changeset
153 data_table = 'isoformData'
21d03c46f286 Uploaded
devteam
parents:
diff changeset
154 table = 'isoforms'
21d03c46f286 Uploaded
devteam
parents:
diff changeset
155 column = 'isoform_id'
21d03c46f286 Uploaded
devteam
parents:
diff changeset
156 self.__get_fpkms( data_table=data_table, table=table, column=column )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
157
21d03c46f286 Uploaded
devteam
parents:
diff changeset
158 def genes_fpkm( self ):
21d03c46f286 Uploaded
devteam
parents:
diff changeset
159 output_cols = [ 'tracking_id', 'class_code', 'nearest_ref_id', 'gene_id', 'gene_short_name',
21d03c46f286 Uploaded
devteam
parents:
diff changeset
160 'tss_id', 'locus', 'length', 'coverage' ]
21d03c46f286 Uploaded
devteam
parents:
diff changeset
161 iso_groups = self.session.execute( 'SELECT gene_id FROM genes LIMIT 1' )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
162 output_cols.extend( self.__get_per_sample_fpkm( identifiers=iso_groups, column='gene_id', table='geneData' ) )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
163 self.__write_line( output_cols )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
164 data_columns = [ 'genes.gene_id', 'genes.class_code', 'genes.nearest_ref_id', 'genes.gene_id', 'genes.gene_short_name',
21d03c46f286 Uploaded
devteam
parents:
diff changeset
165 'GROUP_CONCAT(TSS.TSS_group_id)', 'genes.locus', 'genes.length', 'genes.coverage' ]
21d03c46f286 Uploaded
devteam
parents:
diff changeset
166 query = 'SELECT %s FROM genes JOIN TSS on TSS.gene_id = genes.gene_id GROUP BY genes.gene_id' % ', '.join( data_columns )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
167 result = self.session.execute( query )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
168 for row in result:
21d03c46f286 Uploaded
devteam
parents:
diff changeset
169 gene_id = row[0]
21d03c46f286 Uploaded
devteam
parents:
diff changeset
170 output_data = list( row )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
171 per_sample = self.session.execute( "SELECT fpkm, conf_lo, conf_hi, quant_status FROM geneData WHERE gene_id = '%s' ORDER BY sample_name ASC" % gene_id )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
172 for samplerow in per_sample:
21d03c46f286 Uploaded
devteam
parents:
diff changeset
173 output_data.extend( list( samplerow ) )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
174 self.__write_line( output_data )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
175
21d03c46f286 Uploaded
devteam
parents:
diff changeset
176 def cds_fpkm( self ):
21d03c46f286 Uploaded
devteam
parents:
diff changeset
177 output_cols = [ 'tracking_id', 'class_code', 'nearest_ref_id', 'gene_id', 'gene_short_name',
21d03c46f286 Uploaded
devteam
parents:
diff changeset
178 'tss_id', 'locus', 'length', 'coverage' ]
21d03c46f286 Uploaded
devteam
parents:
diff changeset
179 iso_groups = self.session.execute( 'SELECT CDS_id FROM CDS LIMIT 1' )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
180 output_cols.extend( self.__get_per_sample_fpkm( identifiers=iso_groups, column='CDS_id', table='CDSData' ) )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
181 self.__write_line( output_cols )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
182 data_columns = [ 'CDS_id', 'class_code', 'nearest_ref_id', 'gene_id', 'gene_short_name',
21d03c46f286 Uploaded
devteam
parents:
diff changeset
183 'GROUP_CONCAT(TSS_group_id)', 'locus', 'length', 'coverage' ]
21d03c46f286 Uploaded
devteam
parents:
diff changeset
184 query = 'SELECT %s FROM CDS GROUP BY CDS_id' % ', '.join( data_columns )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
185 result = self.session.execute( query )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
186 for row in result:
21d03c46f286 Uploaded
devteam
parents:
diff changeset
187 CDS_id = row[0]
21d03c46f286 Uploaded
devteam
parents:
diff changeset
188 output_data = list( row )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
189 per_sample = self.session.execute( "SELECT fpkm, conf_lo, conf_hi, quant_status FROM CDSData WHERE CDS_id = '%s' ORDER BY sample_name ASC" % CDS_id )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
190 for samplerow in per_sample:
21d03c46f286 Uploaded
devteam
parents:
diff changeset
191 output_data.extend( list( samplerow ) )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
192 self.__write_line( output_data )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
193
21d03c46f286 Uploaded
devteam
parents:
diff changeset
194 def tss_count_tracking( self ):
21d03c46f286 Uploaded
devteam
parents:
diff changeset
195 self.__get_count_data( table='TSSCount', column='TSS_group_id' )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
196
21d03c46f286 Uploaded
devteam
parents:
diff changeset
197 def isoform_count( self ):
21d03c46f286 Uploaded
devteam
parents:
diff changeset
198 self.__get_count_data( table='isoformCount', column='isoform_id' )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
199
21d03c46f286 Uploaded
devteam
parents:
diff changeset
200 def genes_count( self ):
21d03c46f286 Uploaded
devteam
parents:
diff changeset
201 self.__get_count_data( table='geneCount', column='gene_id' )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
202
21d03c46f286 Uploaded
devteam
parents:
diff changeset
203 def cds_count( self ):
21d03c46f286 Uploaded
devteam
parents:
diff changeset
204 self.__get_count_data( table='CDSCount', column='CDS_id' )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
205
21d03c46f286 Uploaded
devteam
parents:
diff changeset
206 def tss_group_exp( self ):
21d03c46f286 Uploaded
devteam
parents:
diff changeset
207 columns = [ 'TEDD.TSS_group_id', 'TSS.gene_id', 'TSS.gene_short_name', 'TSS.locus',
21d03c46f286 Uploaded
devteam
parents:
diff changeset
208 'TEDD.sample_1', 'TEDD.sample_2', 'TEDD.status',
21d03c46f286 Uploaded
devteam
parents:
diff changeset
209 'TEDD.value_1', 'TEDD.value_2', 'TEDD.log2_fold_change',
21d03c46f286 Uploaded
devteam
parents:
diff changeset
210 'TEDD.test_stat', 'TEDD.p_value', 'TEDD.q_value', 'TEDD.significant' ]
21d03c46f286 Uploaded
devteam
parents:
diff changeset
211 query = [ 'SELECT %s FROM TSSExpDiffData AS TEDD' % ', '.join(columns),
21d03c46f286 Uploaded
devteam
parents:
diff changeset
212 'JOIN TSS on TEDD.TSS_group_id = TSS.TSS_group_id' ]
21d03c46f286 Uploaded
devteam
parents:
diff changeset
213 self.__write_line( [ 'test_id', 'gene_id', 'gene', 'locus',
21d03c46f286 Uploaded
devteam
parents:
diff changeset
214 'sample_1', 'sample_2', 'status', 'value_1',
21d03c46f286 Uploaded
devteam
parents:
diff changeset
215 'value_2', 'log2(fold_change)', 'test_stat',
21d03c46f286 Uploaded
devteam
parents:
diff changeset
216 'p_value', 'q_value', 'significant' ] )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
217 for row in self.session.execute( ' '.join( query ) ):
21d03c46f286 Uploaded
devteam
parents:
diff changeset
218 self.__write_line( row )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
219
21d03c46f286 Uploaded
devteam
parents:
diff changeset
220 def run_info( self ):
21d03c46f286 Uploaded
devteam
parents:
diff changeset
221 self.__write_line( [ 'param', 'value' ] )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
222 for row in self.session.execute( 'SELECT param, value FROM runInfo' ):
21d03c46f286 Uploaded
devteam
parents:
diff changeset
223 self.__write_line( row )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
224
21d03c46f286 Uploaded
devteam
parents:
diff changeset
225 def read_groups( self ):
21d03c46f286 Uploaded
devteam
parents:
diff changeset
226 self.__write_line( [ 'file', 'condition', 'replicate_num', 'total_mass', 'norm_mass', 'internal_scale', 'external_scale' ] )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
227 for row in self.session.execute( 'SELECT file, sample_name, replicate, total_mass, norm_mass, internal_scale, external_scale FROM replicates' ):
21d03c46f286 Uploaded
devteam
parents:
diff changeset
228 self.__write_line( row )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
229
21d03c46f286 Uploaded
devteam
parents:
diff changeset
230 def isoform_exp_diff( self ):
21d03c46f286 Uploaded
devteam
parents:
diff changeset
231 self.__get_exp_diff( table='isoforms', data_table='isoformExpDiffData', data_table_as='iED', column='isoform_id' )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
232
21d03c46f286 Uploaded
devteam
parents:
diff changeset
233 def gene_exp_diff( self ):
21d03c46f286 Uploaded
devteam
parents:
diff changeset
234 self.__get_exp_diff( table='genes', data_table='geneExpDiffData', data_table_as='gEDD', column='gene_id' )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
235
21d03c46f286 Uploaded
devteam
parents:
diff changeset
236 def cds_exp_diff( self ):
21d03c46f286 Uploaded
devteam
parents:
diff changeset
237 self.__get_exp_diff( table='CDS', data_table='CDSExpDiffData', data_table_as='CED', column='CDS_id' )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
238
21d03c46f286 Uploaded
devteam
parents:
diff changeset
239 def tss_rg( self ):
21d03c46f286 Uploaded
devteam
parents:
diff changeset
240 self.__get_read_group_data( table='TSSReplicateData', identifier='TSS_group_id' )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
241
21d03c46f286 Uploaded
devteam
parents:
diff changeset
242 def isoform_rg( self ):
21d03c46f286 Uploaded
devteam
parents:
diff changeset
243 self.__get_read_group_data( table='isoformReplicateData', identifier='isoform_id' )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
244
21d03c46f286 Uploaded
devteam
parents:
diff changeset
245 def gene_rg( self ):
21d03c46f286 Uploaded
devteam
parents:
diff changeset
246 self.__get_read_group_data( table='geneReplicateData', identifier='gene_id' )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
247
21d03c46f286 Uploaded
devteam
parents:
diff changeset
248 def cds_rg( self ):
21d03c46f286 Uploaded
devteam
parents:
diff changeset
249 self.__get_read_group_data( table='CDSReplicateData', identifier='CDS_id' )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
250
21d03c46f286 Uploaded
devteam
parents:
diff changeset
251 def var_model( self ):
21d03c46f286 Uploaded
devteam
parents:
diff changeset
252 header = [ 'condition', 'locus', 'compatible_count_mean', 'compatible_count_var', 'total_count_mean', 'total_count_var', 'fitted_var' ]
21d03c46f286 Uploaded
devteam
parents:
diff changeset
253 self.__write_line( header )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
254 for row in self.session.execute( 'SELECT %s FROM varModel' % ', '.join( header ) ):
21d03c46f286 Uploaded
devteam
parents:
diff changeset
255 self.__write_line( row )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
256
21d03c46f286 Uploaded
devteam
parents:
diff changeset
257 if __name__ == '__main__':
21d03c46f286 Uploaded
devteam
parents:
diff changeset
258 parser = argparse.ArgumentParser()
21d03c46f286 Uploaded
devteam
parents:
diff changeset
259 parser.add_argument( '--file', dest='filename' )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
260 parser.add_argument( '--tables', dest='tables', action='append' )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
261 opts = parser.parse_args()
21d03c46f286 Uploaded
devteam
parents:
diff changeset
262 cb = CummerbundParser( opts )
21d03c46f286 Uploaded
devteam
parents:
diff changeset
263 for table in opts.tables:
21d03c46f286 Uploaded
devteam
parents:
diff changeset
264 cb.generate_file( table )