annotate cummerbund-e132e60f95b1/cuffdiff_wrapper.py @ 0:944bdd58233b

Uploaded
author molson
date Sat, 20 Apr 2013 21:13:00 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
944bdd58233b Uploaded
molson
parents:
diff changeset
1 #!/usr/bin/env python
944bdd58233b Uploaded
molson
parents:
diff changeset
2
944bdd58233b Uploaded
molson
parents:
diff changeset
3 # Wrapper supports Cuffdiff versions v1.3.0-v2.0
944bdd58233b Uploaded
molson
parents:
diff changeset
4
944bdd58233b Uploaded
molson
parents:
diff changeset
5 import optparse, os, shutil, subprocess, sys, tempfile
944bdd58233b Uploaded
molson
parents:
diff changeset
6
944bdd58233b Uploaded
molson
parents:
diff changeset
7 def group_callback( option, op_str, value, parser ):
944bdd58233b Uploaded
molson
parents:
diff changeset
8 groups = []
944bdd58233b Uploaded
molson
parents:
diff changeset
9 flist = []
944bdd58233b Uploaded
molson
parents:
diff changeset
10 for arg in parser.rargs:
944bdd58233b Uploaded
molson
parents:
diff changeset
11 arg = arg.strip()
944bdd58233b Uploaded
molson
parents:
diff changeset
12 if arg[0] is "-":
944bdd58233b Uploaded
molson
parents:
diff changeset
13 break
944bdd58233b Uploaded
molson
parents:
diff changeset
14 elif arg[0] is ",":
944bdd58233b Uploaded
molson
parents:
diff changeset
15 groups.append(flist)
944bdd58233b Uploaded
molson
parents:
diff changeset
16 flist = []
944bdd58233b Uploaded
molson
parents:
diff changeset
17 else:
944bdd58233b Uploaded
molson
parents:
diff changeset
18 flist.append(arg)
944bdd58233b Uploaded
molson
parents:
diff changeset
19 groups.append(flist)
944bdd58233b Uploaded
molson
parents:
diff changeset
20
944bdd58233b Uploaded
molson
parents:
diff changeset
21 setattr(parser.values, option.dest, groups)
944bdd58233b Uploaded
molson
parents:
diff changeset
22
944bdd58233b Uploaded
molson
parents:
diff changeset
23 def label_callback( option, op_str, value, parser ):
944bdd58233b Uploaded
molson
parents:
diff changeset
24 labels = []
944bdd58233b Uploaded
molson
parents:
diff changeset
25 for arg in parser.rargs:
944bdd58233b Uploaded
molson
parents:
diff changeset
26 arg = arg.strip()
944bdd58233b Uploaded
molson
parents:
diff changeset
27 if arg[0] is "-":
944bdd58233b Uploaded
molson
parents:
diff changeset
28 break
944bdd58233b Uploaded
molson
parents:
diff changeset
29 else:
944bdd58233b Uploaded
molson
parents:
diff changeset
30 labels.append(arg)
944bdd58233b Uploaded
molson
parents:
diff changeset
31
944bdd58233b Uploaded
molson
parents:
diff changeset
32 setattr(parser.values, option.dest, labels)
944bdd58233b Uploaded
molson
parents:
diff changeset
33
944bdd58233b Uploaded
molson
parents:
diff changeset
34 def stop_err( msg ):
944bdd58233b Uploaded
molson
parents:
diff changeset
35 sys.stderr.write( "%s\n" % msg )
944bdd58233b Uploaded
molson
parents:
diff changeset
36 sys.exit()
944bdd58233b Uploaded
molson
parents:
diff changeset
37
944bdd58233b Uploaded
molson
parents:
diff changeset
38 # Copied from sam_to_bam.py:
944bdd58233b Uploaded
molson
parents:
diff changeset
39 def check_seq_file( dbkey, cached_seqs_pointer_file ):
944bdd58233b Uploaded
molson
parents:
diff changeset
40 seq_path = ''
944bdd58233b Uploaded
molson
parents:
diff changeset
41 for line in open( cached_seqs_pointer_file ):
944bdd58233b Uploaded
molson
parents:
diff changeset
42 line = line.rstrip( '\r\n' )
944bdd58233b Uploaded
molson
parents:
diff changeset
43 if line and not line.startswith( '#' ) and line.startswith( 'index' ):
944bdd58233b Uploaded
molson
parents:
diff changeset
44 fields = line.split( '\t' )
944bdd58233b Uploaded
molson
parents:
diff changeset
45 if len( fields ) < 3:
944bdd58233b Uploaded
molson
parents:
diff changeset
46 continue
944bdd58233b Uploaded
molson
parents:
diff changeset
47 if fields[1] == dbkey:
944bdd58233b Uploaded
molson
parents:
diff changeset
48 seq_path = fields[2].strip()
944bdd58233b Uploaded
molson
parents:
diff changeset
49 break
944bdd58233b Uploaded
molson
parents:
diff changeset
50 return seq_path
944bdd58233b Uploaded
molson
parents:
diff changeset
51
944bdd58233b Uploaded
molson
parents:
diff changeset
52 def __main__():
944bdd58233b Uploaded
molson
parents:
diff changeset
53 #Parse Command Line
944bdd58233b Uploaded
molson
parents:
diff changeset
54 parser = optparse.OptionParser()
944bdd58233b Uploaded
molson
parents:
diff changeset
55
944bdd58233b Uploaded
molson
parents:
diff changeset
56 # Cuffdiff options.
944bdd58233b Uploaded
molson
parents:
diff changeset
57 parser.add_option( '-s', '--inner-dist-std-dev', dest='inner_dist_std_dev', help='The standard deviation for the distribution on inner distances between mate pairs. The default is 20bp.' )
944bdd58233b Uploaded
molson
parents:
diff changeset
58 parser.add_option( '-p', '--num-threads', dest='num_threads', help='Use this many threads to align reads. The default is 1.' )
944bdd58233b Uploaded
molson
parents:
diff changeset
59 parser.add_option( '-m', '--inner-mean-dist', dest='inner_mean_dist', help='This is the expected (mean) inner distance between mate pairs. \
944bdd58233b Uploaded
molson
parents:
diff changeset
60 For, example, for paired end runs with fragments selected at 300bp, \
944bdd58233b Uploaded
molson
parents:
diff changeset
61 where each end is 50bp, you should set -r to be 200. The default is 45bp.')
944bdd58233b Uploaded
molson
parents:
diff changeset
62 parser.add_option( '-c', '--min-alignment-count', dest='min_alignment_count', help='The minimum number of alignments in a locus for needed to conduct significance testing on changes in that locus observed between samples. If no testing is performed, changes in the locus are deemed not signficant, and the locus\' observed changes don\'t contribute to correction for multiple testing. The default is 1,000 fragment alignments (up to 2,000 paired reads).' )
944bdd58233b Uploaded
molson
parents:
diff changeset
63 parser.add_option( '--FDR', dest='FDR', help='The allowed false discovery rate. The default is 0.05.' )
944bdd58233b Uploaded
molson
parents:
diff changeset
64 parser.add_option( '-u', '--multi-read-correct', dest='multi_read_correct', action="store_true", help='Tells Cufflinks to do an initial estimation procedure to more accurately weight reads mapping to multiple locations in the genome')
944bdd58233b Uploaded
molson
parents:
diff changeset
65
944bdd58233b Uploaded
molson
parents:
diff changeset
66 # Advanced Options:
944bdd58233b Uploaded
molson
parents:
diff changeset
67 parser.add_option( '--num-importance-samples', dest='num_importance_samples', help='Sets the number of importance samples generated for each locus during abundance estimation. Default: 1000' )
944bdd58233b Uploaded
molson
parents:
diff changeset
68 parser.add_option( '--max-mle-iterations', dest='max_mle_iterations', help='Sets the number of iterations allowed during maximum likelihood estimation of abundances. Default: 5000' )
944bdd58233b Uploaded
molson
parents:
diff changeset
69
944bdd58233b Uploaded
molson
parents:
diff changeset
70 # Wrapper / Galaxy options.
944bdd58233b Uploaded
molson
parents:
diff changeset
71 parser.add_option( '-f', '--files', dest='groups', action="callback", callback=group_callback, help="Groups to be processed, groups are separated by spaces, replicates in a group comma separated. group1_rep1,group1_rep2 group2_rep1,group2_rep2, ..., groupN_rep1, groupN_rep2" )
944bdd58233b Uploaded
molson
parents:
diff changeset
72 parser.add_option( '-A', '--inputA', dest='inputA', help='A transcript GTF file produced by cufflinks, cuffcompare, or other source.')
944bdd58233b Uploaded
molson
parents:
diff changeset
73 parser.add_option( '-1', '--input1', dest='input1', help='File of RNA-Seq read alignments in the SAM format. SAM is a standard short read alignment, that allows aligners to attach custom tags to individual alignments, and Cufflinks requires that the alignments you supply have some of these tags. Please see Input formats for more details.' )
944bdd58233b Uploaded
molson
parents:
diff changeset
74 parser.add_option( '-2', '--input2', dest='input2', help='File of RNA-Seq read alignments in the SAM format. SAM is a standard short read alignment, that allows aligners to attach custom tags to individual alignments, and Cufflinks requires that the alignments you supply have some of these tags. Please see Input formats for more details.' )
944bdd58233b Uploaded
molson
parents:
diff changeset
75
944bdd58233b Uploaded
molson
parents:
diff changeset
76 # Label options
944bdd58233b Uploaded
molson
parents:
diff changeset
77 parser.add_option('-L', '--labels', dest='labels', action="callback", callback=label_callback, help="Labels for the groups the replicates are in.")
944bdd58233b Uploaded
molson
parents:
diff changeset
78
944bdd58233b Uploaded
molson
parents:
diff changeset
79 # Normalization options.
944bdd58233b Uploaded
molson
parents:
diff changeset
80 parser.add_option( "-N", "--quartile-normalization", dest="do_normalization", action="store_true" )
944bdd58233b Uploaded
molson
parents:
diff changeset
81
944bdd58233b Uploaded
molson
parents:
diff changeset
82 # Bias correction options.
944bdd58233b Uploaded
molson
parents:
diff changeset
83 parser.add_option( '-b', dest='do_bias_correction', action="store_true", help='Providing Cufflinks with a multifasta file via this option instructs it to run our new bias detection and correction algorithm which can significantly improve accuracy of transcript abundance estimates.')
944bdd58233b Uploaded
molson
parents:
diff changeset
84 parser.add_option( '', '--dbkey', dest='dbkey', help='The build of the reference dataset' )
944bdd58233b Uploaded
molson
parents:
diff changeset
85 parser.add_option( '', '--index_dir', dest='index_dir', help='GALAXY_DATA_INDEX_DIR' )
944bdd58233b Uploaded
molson
parents:
diff changeset
86 parser.add_option( '', '--ref_file', dest='ref_file', help='The reference dataset from the history' )
944bdd58233b Uploaded
molson
parents:
diff changeset
87
944bdd58233b Uploaded
molson
parents:
diff changeset
88 # Outputs.
944bdd58233b Uploaded
molson
parents:
diff changeset
89 parser.add_option( "--isoforms_fpkm_tracking_output", dest="isoforms_fpkm_tracking_output" )
944bdd58233b Uploaded
molson
parents:
diff changeset
90 parser.add_option( "--genes_fpkm_tracking_output", dest="genes_fpkm_tracking_output" )
944bdd58233b Uploaded
molson
parents:
diff changeset
91 parser.add_option( "--cds_fpkm_tracking_output", dest="cds_fpkm_tracking_output" )
944bdd58233b Uploaded
molson
parents:
diff changeset
92 parser.add_option( "--tss_groups_fpkm_tracking_output", dest="tss_groups_fpkm_tracking_output" )
944bdd58233b Uploaded
molson
parents:
diff changeset
93 parser.add_option( "--isoforms_read_group_tracking_output", dest="isoforms_read_group_tracking_output", default=None)
944bdd58233b Uploaded
molson
parents:
diff changeset
94 parser.add_option( "--genes_read_group_tracking_output", dest="genes_read_group_tracking_output", default=None )
944bdd58233b Uploaded
molson
parents:
diff changeset
95 parser.add_option( "--cds_read_group_tracking_output", dest="cds_read_group_tracking_output", default=None )
944bdd58233b Uploaded
molson
parents:
diff changeset
96 parser.add_option( "--tss_groups_read_group_tracking_output", dest="tss_groups_read_group_tracking_output", default=None )
944bdd58233b Uploaded
molson
parents:
diff changeset
97 parser.add_option( "--isoforms_exp_output", dest="isoforms_exp_output" )
944bdd58233b Uploaded
molson
parents:
diff changeset
98 parser.add_option( "--genes_exp_output", dest="genes_exp_output" )
944bdd58233b Uploaded
molson
parents:
diff changeset
99 parser.add_option( "--tss_groups_exp_output", dest="tss_groups_exp_output" )
944bdd58233b Uploaded
molson
parents:
diff changeset
100 parser.add_option( "--cds_exp_fpkm_tracking_output", dest="cds_exp_fpkm_tracking_output" )
944bdd58233b Uploaded
molson
parents:
diff changeset
101 parser.add_option( "--cds_diff_output", dest="cds_diff_output" )
944bdd58233b Uploaded
molson
parents:
diff changeset
102 parser.add_option( "--isoforms_count_tracking_output", dest="isoforms_count_tracking_output" )
944bdd58233b Uploaded
molson
parents:
diff changeset
103 parser.add_option( "--genes_count_tracking_output", dest="genes_count_tracking_output" )
944bdd58233b Uploaded
molson
parents:
diff changeset
104 parser.add_option( "--cds_count_tracking_output", dest="cds_count_tracking_output" )
944bdd58233b Uploaded
molson
parents:
diff changeset
105 parser.add_option( "--tss_groups_count_tracking_output", dest="tss_groups_count_tracking_output" )
944bdd58233b Uploaded
molson
parents:
diff changeset
106 parser.add_option( "--splicing_diff_output", dest="splicing_diff_output" )
944bdd58233b Uploaded
molson
parents:
diff changeset
107 parser.add_option( "--promoters_diff_output", dest="promoters_diff_output" )
944bdd58233b Uploaded
molson
parents:
diff changeset
108 parser.add_option( "--run_info_output", dest="run_info_output" )
944bdd58233b Uploaded
molson
parents:
diff changeset
109 parser.add_option( "--read_groups_info_output", dest="read_groups_info_output" )
944bdd58233b Uploaded
molson
parents:
diff changeset
110 parser.add_option( "--cuffdatadir", dest="cuffdatadir", default=None)
944bdd58233b Uploaded
molson
parents:
diff changeset
111 parser.add_option( "--cummeRbund_db_output", dest="cummeRbund_db_output", default=None)
944bdd58233b Uploaded
molson
parents:
diff changeset
112
944bdd58233b Uploaded
molson
parents:
diff changeset
113 (options, args) = parser.parse_args()
944bdd58233b Uploaded
molson
parents:
diff changeset
114
944bdd58233b Uploaded
molson
parents:
diff changeset
115 # output version # of tool
944bdd58233b Uploaded
molson
parents:
diff changeset
116 try:
944bdd58233b Uploaded
molson
parents:
diff changeset
117 tmp = tempfile.NamedTemporaryFile().name
944bdd58233b Uploaded
molson
parents:
diff changeset
118 tmp_stdout = open( tmp, 'wb' )
944bdd58233b Uploaded
molson
parents:
diff changeset
119 proc = subprocess.Popen( args='cuffdiff --no-update-check 2>&1', shell=True, stdout=tmp_stdout )
944bdd58233b Uploaded
molson
parents:
diff changeset
120 tmp_stdout.close()
944bdd58233b Uploaded
molson
parents:
diff changeset
121 returncode = proc.wait()
944bdd58233b Uploaded
molson
parents:
diff changeset
122 stdout = None
944bdd58233b Uploaded
molson
parents:
diff changeset
123 for line in open( tmp_stdout.name, 'rb' ):
944bdd58233b Uploaded
molson
parents:
diff changeset
124 if line.lower().find( 'cuffdiff v' ) >= 0:
944bdd58233b Uploaded
molson
parents:
diff changeset
125 stdout = line.strip()
944bdd58233b Uploaded
molson
parents:
diff changeset
126 break
944bdd58233b Uploaded
molson
parents:
diff changeset
127 if stdout:
944bdd58233b Uploaded
molson
parents:
diff changeset
128 sys.stdout.write( '%s\n' % stdout )
944bdd58233b Uploaded
molson
parents:
diff changeset
129 else:
944bdd58233b Uploaded
molson
parents:
diff changeset
130 raise Exception
944bdd58233b Uploaded
molson
parents:
diff changeset
131 except:
944bdd58233b Uploaded
molson
parents:
diff changeset
132 sys.stdout.write( 'Could not determine Cuffdiff version\n' )
944bdd58233b Uploaded
molson
parents:
diff changeset
133
944bdd58233b Uploaded
molson
parents:
diff changeset
134 # Make temp directory for output.
944bdd58233b Uploaded
molson
parents:
diff changeset
135 tmp_output_dir = tempfile.mkdtemp()
944bdd58233b Uploaded
molson
parents:
diff changeset
136 cuffdatadir = options.cuffdatadir if options.cuffdatadir else tmp_output_dir
944bdd58233b Uploaded
molson
parents:
diff changeset
137 if not os.path.exists( cuffdatadir ):
944bdd58233b Uploaded
molson
parents:
diff changeset
138 os.makedirs( cuffdatadir )
944bdd58233b Uploaded
molson
parents:
diff changeset
139
944bdd58233b Uploaded
molson
parents:
diff changeset
140
944bdd58233b Uploaded
molson
parents:
diff changeset
141 # If doing bias correction, set/link to sequence file.
944bdd58233b Uploaded
molson
parents:
diff changeset
142 if options.do_bias_correction:
944bdd58233b Uploaded
molson
parents:
diff changeset
143 if options.ref_file != 'None':
944bdd58233b Uploaded
molson
parents:
diff changeset
144 # Sequence data from history.
944bdd58233b Uploaded
molson
parents:
diff changeset
145 # Create symbolic link to ref_file so that index will be created in working directory.
944bdd58233b Uploaded
molson
parents:
diff changeset
146 seq_path = os.path.join( cuffdatadir, "ref.fa" )
944bdd58233b Uploaded
molson
parents:
diff changeset
147 os.symlink( options.ref_file, seq_path )
944bdd58233b Uploaded
molson
parents:
diff changeset
148 else:
944bdd58233b Uploaded
molson
parents:
diff changeset
149 # Sequence data from loc file.
944bdd58233b Uploaded
molson
parents:
diff changeset
150 cached_seqs_pointer_file = os.path.join( options.index_dir, 'sam_fa_indices.loc' )
944bdd58233b Uploaded
molson
parents:
diff changeset
151 if not os.path.exists( cached_seqs_pointer_file ):
944bdd58233b Uploaded
molson
parents:
diff changeset
152 stop_err( 'The required file (%s) does not exist.' % cached_seqs_pointer_file )
944bdd58233b Uploaded
molson
parents:
diff changeset
153 # If found for the dbkey, seq_path will look something like /galaxy/data/equCab2/sam_index/equCab2.fa,
944bdd58233b Uploaded
molson
parents:
diff changeset
154 # and the equCab2.fa file will contain fasta sequences.
944bdd58233b Uploaded
molson
parents:
diff changeset
155 seq_path = check_seq_file( options.dbkey, cached_seqs_pointer_file )
944bdd58233b Uploaded
molson
parents:
diff changeset
156 if seq_path == '':
944bdd58233b Uploaded
molson
parents:
diff changeset
157 stop_err( 'No sequence data found for dbkey %s, so bias correction cannot be used.' % options.dbkey )
944bdd58233b Uploaded
molson
parents:
diff changeset
158
944bdd58233b Uploaded
molson
parents:
diff changeset
159 # Build command.
944bdd58233b Uploaded
molson
parents:
diff changeset
160
944bdd58233b Uploaded
molson
parents:
diff changeset
161 # Base; always use quiet mode to avoid problems with storing log output.
944bdd58233b Uploaded
molson
parents:
diff changeset
162 cmd = "cuffdiff --no-update-check -q"
944bdd58233b Uploaded
molson
parents:
diff changeset
163
944bdd58233b Uploaded
molson
parents:
diff changeset
164 # Add options.
944bdd58233b Uploaded
molson
parents:
diff changeset
165 if options.inner_dist_std_dev:
944bdd58233b Uploaded
molson
parents:
diff changeset
166 cmd += ( " -s %i" % int ( options.inner_dist_std_dev ) )
944bdd58233b Uploaded
molson
parents:
diff changeset
167 if options.num_threads:
944bdd58233b Uploaded
molson
parents:
diff changeset
168 cmd += ( " -p %i" % int ( options.num_threads ) )
944bdd58233b Uploaded
molson
parents:
diff changeset
169 if options.inner_mean_dist:
944bdd58233b Uploaded
molson
parents:
diff changeset
170 cmd += ( " -m %i" % int ( options.inner_mean_dist ) )
944bdd58233b Uploaded
molson
parents:
diff changeset
171 if options.min_alignment_count:
944bdd58233b Uploaded
molson
parents:
diff changeset
172 cmd += ( " -c %i" % int ( options.min_alignment_count ) )
944bdd58233b Uploaded
molson
parents:
diff changeset
173 if options.FDR:
944bdd58233b Uploaded
molson
parents:
diff changeset
174 cmd += ( " --FDR %f" % float( options.FDR ) )
944bdd58233b Uploaded
molson
parents:
diff changeset
175 if options.multi_read_correct:
944bdd58233b Uploaded
molson
parents:
diff changeset
176 cmd += ( " -u" )
944bdd58233b Uploaded
molson
parents:
diff changeset
177 if options.num_importance_samples:
944bdd58233b Uploaded
molson
parents:
diff changeset
178 cmd += ( " --num-importance-samples %i" % int ( options.num_importance_samples ) )
944bdd58233b Uploaded
molson
parents:
diff changeset
179 if options.max_mle_iterations:
944bdd58233b Uploaded
molson
parents:
diff changeset
180 cmd += ( " --max-mle-iterations %i" % int ( options.max_mle_iterations ) )
944bdd58233b Uploaded
molson
parents:
diff changeset
181 if options.do_normalization:
944bdd58233b Uploaded
molson
parents:
diff changeset
182 cmd += ( " -N" )
944bdd58233b Uploaded
molson
parents:
diff changeset
183 if options.do_bias_correction:
944bdd58233b Uploaded
molson
parents:
diff changeset
184 cmd += ( " -b %s" % seq_path )
944bdd58233b Uploaded
molson
parents:
diff changeset
185
944bdd58233b Uploaded
molson
parents:
diff changeset
186 # Add inputs.
944bdd58233b Uploaded
molson
parents:
diff changeset
187 # For replicate analysis: group1_rep1,group1_rep2 groupN_rep1,groupN_rep2
944bdd58233b Uploaded
molson
parents:
diff changeset
188 if options.groups:
944bdd58233b Uploaded
molson
parents:
diff changeset
189 cmd += " --labels "
944bdd58233b Uploaded
molson
parents:
diff changeset
190 for label in options.labels:
944bdd58233b Uploaded
molson
parents:
diff changeset
191 cmd += label + ","
944bdd58233b Uploaded
molson
parents:
diff changeset
192 cmd = cmd[:-1]
944bdd58233b Uploaded
molson
parents:
diff changeset
193
944bdd58233b Uploaded
molson
parents:
diff changeset
194 cmd += " " + options.inputA + " "
944bdd58233b Uploaded
molson
parents:
diff changeset
195
944bdd58233b Uploaded
molson
parents:
diff changeset
196 for group in options.groups:
944bdd58233b Uploaded
molson
parents:
diff changeset
197 for filename in group:
944bdd58233b Uploaded
molson
parents:
diff changeset
198 cmd += filename + ","
944bdd58233b Uploaded
molson
parents:
diff changeset
199 cmd = cmd[:-1] + " "
944bdd58233b Uploaded
molson
parents:
diff changeset
200 else:
944bdd58233b Uploaded
molson
parents:
diff changeset
201 cmd += " " + options.inputA + " " + options.input1 + " " + options.input2
944bdd58233b Uploaded
molson
parents:
diff changeset
202
944bdd58233b Uploaded
molson
parents:
diff changeset
203 # Debugging.
944bdd58233b Uploaded
molson
parents:
diff changeset
204 print cmd
944bdd58233b Uploaded
molson
parents:
diff changeset
205
944bdd58233b Uploaded
molson
parents:
diff changeset
206 # Run command.
944bdd58233b Uploaded
molson
parents:
diff changeset
207 try:
944bdd58233b Uploaded
molson
parents:
diff changeset
208 tmp_name = tempfile.NamedTemporaryFile( dir=tmp_output_dir ).name
944bdd58233b Uploaded
molson
parents:
diff changeset
209 tmp_stderr = open( tmp_name, 'wb' )
944bdd58233b Uploaded
molson
parents:
diff changeset
210 proc = subprocess.Popen( args=cmd, shell=True, cwd=cuffdatadir, stderr=tmp_stderr.fileno() )
944bdd58233b Uploaded
molson
parents:
diff changeset
211 returncode = proc.wait()
944bdd58233b Uploaded
molson
parents:
diff changeset
212 tmp_stderr.close()
944bdd58233b Uploaded
molson
parents:
diff changeset
213
944bdd58233b Uploaded
molson
parents:
diff changeset
214 # Get stderr, allowing for case where it's very large.
944bdd58233b Uploaded
molson
parents:
diff changeset
215 tmp_stderr = open( tmp_name, 'rb' )
944bdd58233b Uploaded
molson
parents:
diff changeset
216 stderr = ''
944bdd58233b Uploaded
molson
parents:
diff changeset
217 buffsize = 1048576
944bdd58233b Uploaded
molson
parents:
diff changeset
218 try:
944bdd58233b Uploaded
molson
parents:
diff changeset
219 while True:
944bdd58233b Uploaded
molson
parents:
diff changeset
220 stderr += tmp_stderr.read( buffsize )
944bdd58233b Uploaded
molson
parents:
diff changeset
221 if not stderr or len( stderr ) % buffsize != 0:
944bdd58233b Uploaded
molson
parents:
diff changeset
222 break
944bdd58233b Uploaded
molson
parents:
diff changeset
223 except OverflowError:
944bdd58233b Uploaded
molson
parents:
diff changeset
224 pass
944bdd58233b Uploaded
molson
parents:
diff changeset
225 tmp_stderr.close()
944bdd58233b Uploaded
molson
parents:
diff changeset
226
944bdd58233b Uploaded
molson
parents:
diff changeset
227 # Error checking.
944bdd58233b Uploaded
molson
parents:
diff changeset
228 if returncode != 0:
944bdd58233b Uploaded
molson
parents:
diff changeset
229 raise Exception, stderr
944bdd58233b Uploaded
molson
parents:
diff changeset
230
944bdd58233b Uploaded
molson
parents:
diff changeset
231 # check that there are results in the output file
944bdd58233b Uploaded
molson
parents:
diff changeset
232 if len( open( os.path.join( cuffdatadir, "isoforms.fpkm_tracking" ), 'rb' ).read().strip() ) == 0:
944bdd58233b Uploaded
molson
parents:
diff changeset
233 raise Exception, 'The main output file is empty, there may be an error with your input file or settings.'
944bdd58233b Uploaded
molson
parents:
diff changeset
234 except Exception, e:
944bdd58233b Uploaded
molson
parents:
diff changeset
235 stop_err( 'Error running cuffdiff. ' + str( e ) )
944bdd58233b Uploaded
molson
parents:
diff changeset
236
944bdd58233b Uploaded
molson
parents:
diff changeset
237
944bdd58233b Uploaded
molson
parents:
diff changeset
238 # Copy output files from tmp directory to specified files.
944bdd58233b Uploaded
molson
parents:
diff changeset
239 try:
944bdd58233b Uploaded
molson
parents:
diff changeset
240 try:
944bdd58233b Uploaded
molson
parents:
diff changeset
241 if options.isoforms_fpkm_tracking_output and os.path.exists(os.path.join( cuffdatadir, "isoforms.fpkm_tracking" )):
944bdd58233b Uploaded
molson
parents:
diff changeset
242 shutil.copyfile( os.path.join( cuffdatadir, "isoforms.fpkm_tracking" ), options.isoforms_fpkm_tracking_output )
944bdd58233b Uploaded
molson
parents:
diff changeset
243 if options.genes_fpkm_tracking_output and os.path.exists(os.path.join( cuffdatadir, "genes.fpkm_tracking" )):
944bdd58233b Uploaded
molson
parents:
diff changeset
244 shutil.copyfile( os.path.join( cuffdatadir, "genes.fpkm_tracking" ), options.genes_fpkm_tracking_output )
944bdd58233b Uploaded
molson
parents:
diff changeset
245 if options.cds_fpkm_tracking_output and os.path.exists(os.path.join( cuffdatadir, "cds.fpkm_tracking" )):
944bdd58233b Uploaded
molson
parents:
diff changeset
246 shutil.copyfile( os.path.join( cuffdatadir, "cds.fpkm_tracking" ), options.cds_fpkm_tracking_output )
944bdd58233b Uploaded
molson
parents:
diff changeset
247 if options.tss_groups_fpkm_tracking_output and os.path.exists(os.path.join( cuffdatadir, "tss_groups.fpkm_tracking" )):
944bdd58233b Uploaded
molson
parents:
diff changeset
248 shutil.copyfile( os.path.join( cuffdatadir, "tss_groups.fpkm_tracking" ), options.tss_groups_fpkm_tracking_output )
944bdd58233b Uploaded
molson
parents:
diff changeset
249
944bdd58233b Uploaded
molson
parents:
diff changeset
250 if options.isoforms_read_group_tracking_output and os.path.exists(os.path.join( cuffdatadir, "isoforms.read_group_tracking" )):
944bdd58233b Uploaded
molson
parents:
diff changeset
251 shutil.copyfile( os.path.join( cuffdatadir, "isoforms.read_group_tracking" ), options.isoforms_read_group_tracking_output )
944bdd58233b Uploaded
molson
parents:
diff changeset
252 if options.genes_read_group_tracking_output and os.path.exists(os.path.join( cuffdatadir, "genes.read_group_tracking" )):
944bdd58233b Uploaded
molson
parents:
diff changeset
253 shutil.copyfile( os.path.join( cuffdatadir, "genes.read_group_tracking" ), options.genes_read_group_tracking_output )
944bdd58233b Uploaded
molson
parents:
diff changeset
254 if options.cds_read_group_tracking_output and os.path.exists(os.path.join( cuffdatadir, "cds.read_group_tracking" )):
944bdd58233b Uploaded
molson
parents:
diff changeset
255 shutil.copyfile( os.path.join( cuffdatadir, "cds.read_group_tracking" ), options.cds_read_group_tracking_output )
944bdd58233b Uploaded
molson
parents:
diff changeset
256 if options.tss_groups_read_group_tracking_output and os.path.exists(os.path.join( cuffdatadir, "tss_groups.read_group_tracking" )):
944bdd58233b Uploaded
molson
parents:
diff changeset
257 shutil.copyfile( os.path.join( cuffdatadir, "tss_groups.read_group_tracking" ), options.tss_groups_read_group_tracking_output )
944bdd58233b Uploaded
molson
parents:
diff changeset
258
944bdd58233b Uploaded
molson
parents:
diff changeset
259 if options.isoforms_exp_output and os.path.exists(os.path.join( cuffdatadir, "isoform_exp.diff" )):
944bdd58233b Uploaded
molson
parents:
diff changeset
260 shutil.copyfile( os.path.join( cuffdatadir, "isoform_exp.diff" ), options.isoforms_exp_output )
944bdd58233b Uploaded
molson
parents:
diff changeset
261 if options.genes_exp_output and os.path.exists(os.path.join( cuffdatadir, "gene_exp.diff" )):
944bdd58233b Uploaded
molson
parents:
diff changeset
262 shutil.copyfile( os.path.join( cuffdatadir, "gene_exp.diff" ), options.genes_exp_output )
944bdd58233b Uploaded
molson
parents:
diff changeset
263 if options.cds_exp_fpkm_tracking_output and os.path.exists(os.path.join( cuffdatadir, "cds_exp.diff" )):
944bdd58233b Uploaded
molson
parents:
diff changeset
264 shutil.copyfile( os.path.join( cuffdatadir, "cds_exp.diff" ), options.cds_exp_fpkm_tracking_output )
944bdd58233b Uploaded
molson
parents:
diff changeset
265 if options.tss_groups_exp_output and os.path.exists(os.path.join( cuffdatadir, "tss_group_exp.diff" )):
944bdd58233b Uploaded
molson
parents:
diff changeset
266 shutil.copyfile( os.path.join( cuffdatadir, "tss_group_exp.diff" ), options.tss_groups_exp_output )
944bdd58233b Uploaded
molson
parents:
diff changeset
267
944bdd58233b Uploaded
molson
parents:
diff changeset
268 if options.isoforms_count_tracking_output and os.path.exists(os.path.join( cuffdatadir, "isoforms.count_tracking" )):
944bdd58233b Uploaded
molson
parents:
diff changeset
269 shutil.copyfile( os.path.join( cuffdatadir, "isoforms.count_tracking" ), options.isoforms_count_tracking_output )
944bdd58233b Uploaded
molson
parents:
diff changeset
270 if options.genes_count_tracking_output and os.path.exists(os.path.join( cuffdatadir, "genes.count_tracking" )):
944bdd58233b Uploaded
molson
parents:
diff changeset
271 shutil.copyfile( os.path.join( cuffdatadir, "genes.count_tracking" ), options.genes_count_tracking_output )
944bdd58233b Uploaded
molson
parents:
diff changeset
272 if options.cds_count_tracking_output and os.path.exists(os.path.join( cuffdatadir, "cds.count_tracking" )):
944bdd58233b Uploaded
molson
parents:
diff changeset
273 shutil.copyfile( os.path.join( cuffdatadir, "cds.count_tracking" ), options.cds_count_tracking_output )
944bdd58233b Uploaded
molson
parents:
diff changeset
274 if options.tss_groups_count_tracking_output and os.path.exists(os.path.join( cuffdatadir, "tss_groups.count_tracking" )):
944bdd58233b Uploaded
molson
parents:
diff changeset
275 shutil.copyfile( os.path.join( cuffdatadir, "tss_groups.count_tracking" ), options.tss_groups_count_tracking_output )
944bdd58233b Uploaded
molson
parents:
diff changeset
276
944bdd58233b Uploaded
molson
parents:
diff changeset
277 if options.cds_diff_output and os.path.exists(os.path.join( cuffdatadir, "cds.diff" )):
944bdd58233b Uploaded
molson
parents:
diff changeset
278 shutil.copyfile( os.path.join( cuffdatadir, "cds.diff" ), options.cds_diff_output )
944bdd58233b Uploaded
molson
parents:
diff changeset
279
944bdd58233b Uploaded
molson
parents:
diff changeset
280 if options.splicing_diff_output and os.path.exists(os.path.join( cuffdatadir, "splicing.diff" )):
944bdd58233b Uploaded
molson
parents:
diff changeset
281 shutil.copyfile( os.path.join( cuffdatadir, "splicing.diff" ), options.splicing_diff_output )
944bdd58233b Uploaded
molson
parents:
diff changeset
282 if options.promoters_diff_output and os.path.exists(os.path.join( cuffdatadir, "promoters.diff" )):
944bdd58233b Uploaded
molson
parents:
diff changeset
283 shutil.copyfile( os.path.join( cuffdatadir, "promoters.diff" ), options.promoters_diff_output )
944bdd58233b Uploaded
molson
parents:
diff changeset
284
944bdd58233b Uploaded
molson
parents:
diff changeset
285 if options.run_info_output and os.path.exists(os.path.join( cuffdatadir, "run.info" )):
944bdd58233b Uploaded
molson
parents:
diff changeset
286 shutil.copyfile( os.path.join( cuffdatadir, "run.info" ), options.run_info_output )
944bdd58233b Uploaded
molson
parents:
diff changeset
287 if options.read_groups_info_output and os.path.exists(os.path.join( cuffdatadir, "read_groups.info" )):
944bdd58233b Uploaded
molson
parents:
diff changeset
288 shutil.copyfile( os.path.join( cuffdatadir, "read_groups.info" ), options.read_groups_info_output )
944bdd58233b Uploaded
molson
parents:
diff changeset
289
944bdd58233b Uploaded
molson
parents:
diff changeset
290 except Exception, e:
944bdd58233b Uploaded
molson
parents:
diff changeset
291 stop_err( 'Error in cuffdiff:\n' + str( e ) )
944bdd58233b Uploaded
molson
parents:
diff changeset
292 if options.cummeRbund_db_output:
944bdd58233b Uploaded
molson
parents:
diff changeset
293 try:
944bdd58233b Uploaded
molson
parents:
diff changeset
294 dbFile = 'cuffData.db'
944bdd58233b Uploaded
molson
parents:
diff changeset
295 rscript = tempfile.NamedTemporaryFile( dir=tmp_output_dir,suffix='.r' ).name
944bdd58233b Uploaded
molson
parents:
diff changeset
296 rscript_fh = open( rscript, 'wb' )
944bdd58233b Uploaded
molson
parents:
diff changeset
297 rscript_fh.write('library(cummeRbund)\n')
944bdd58233b Uploaded
molson
parents:
diff changeset
298 if options.inputA and options.ref_file:
944bdd58233b Uploaded
molson
parents:
diff changeset
299 rscript_fh.write('cuff<-readCufflinks(dir = "%s", dbFile = "%s", gtfFile = "%s", genome = "%s", rebuild = T)\n' % (cuffdatadir,dbFile,options.inputA,options.ref_file))
944bdd58233b Uploaded
molson
parents:
diff changeset
300 else:
944bdd58233b Uploaded
molson
parents:
diff changeset
301 rscript_fh.write('cuff<-readCufflinks(dir = "%s", dbFile = "%s", rebuild = T)\n' % (cuffdatadir,dbFile))
944bdd58233b Uploaded
molson
parents:
diff changeset
302 rscript_fh.close()
944bdd58233b Uploaded
molson
parents:
diff changeset
303 cmd = ( "Rscript --vanilla %s" % rscript )
944bdd58233b Uploaded
molson
parents:
diff changeset
304 tmp_name = tempfile.NamedTemporaryFile( dir=tmp_output_dir ).name
944bdd58233b Uploaded
molson
parents:
diff changeset
305 tmp_stderr = open( tmp_name, 'wb' )
944bdd58233b Uploaded
molson
parents:
diff changeset
306 proc = subprocess.Popen( args=cmd, shell=True, stderr=tmp_stderr.fileno() )
944bdd58233b Uploaded
molson
parents:
diff changeset
307 #proc = subprocess.Popen( args=cmd, shell=True)
944bdd58233b Uploaded
molson
parents:
diff changeset
308 returncode = proc.wait()
944bdd58233b Uploaded
molson
parents:
diff changeset
309 tmp_stderr.close()
944bdd58233b Uploaded
molson
parents:
diff changeset
310 if os.path.exists(os.path.join( cuffdatadir, dbFile )):
944bdd58233b Uploaded
molson
parents:
diff changeset
311 shutil.copyfile( os.path.join( cuffdatadir, dbFile ), options.cummeRbund_db_output )
944bdd58233b Uploaded
molson
parents:
diff changeset
312 shutil.rmtree(os.path.join( cuffdatadir, dbFile ))
944bdd58233b Uploaded
molson
parents:
diff changeset
313 except Exception, e:
944bdd58233b Uploaded
molson
parents:
diff changeset
314 stop_err( 'Error generating cummeRbund cuffData.db:\n' + str( e ) )
944bdd58233b Uploaded
molson
parents:
diff changeset
315 finally:
944bdd58233b Uploaded
molson
parents:
diff changeset
316 # Clean up temp dirs
944bdd58233b Uploaded
molson
parents:
diff changeset
317 if os.path.exists( tmp_output_dir ):
944bdd58233b Uploaded
molson
parents:
diff changeset
318 shutil.rmtree( tmp_output_dir )
944bdd58233b Uploaded
molson
parents:
diff changeset
319
944bdd58233b Uploaded
molson
parents:
diff changeset
320 if __name__=="__main__": __main__()