annotate cufflinks_wrapper.py @ 0:ae8ad9d17e34 draft

Uploaded
author geert-vandeweyer
date Fri, 01 Aug 2014 07:45:21 -0400
parents
children 261425bba55d
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
1 #!/usr/bin/env python
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
2
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
3 # Supports Cufflinks versions 1.3 and newer (updated based on 2.2.1).
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
4
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
5 import optparse, os, shutil, subprocess, sys, tempfile
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
6 from galaxy import eggs
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
7 from galaxy.datatypes.util.gff_util import parse_gff_attributes, gff_attributes_to_str
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
8
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
9 def stop_err( msg ):
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
10 sys.stderr.write( "%s\n" % msg )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
11 sys.exit()
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
12
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
13 def __main__():
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
14 #Parse Command Line
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
15 parser = optparse.OptionParser()
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
16 parser.add_option( '-1', '--input', dest='input', help=' file of RNA-Seq read alignments in the SAM format. SAM is a standard short read alignment, that allows aligners to attach custom tags to individual alignments, and Cufflinks requires that the alignments you supply have some of these tags. Please see Input formats for more details.' )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
17 parser.add_option( '-I', '--max-intron-length', dest='max_intron_len', help='The minimum intron length. Cufflinks will not report transcripts with introns longer than this, and will ignore SAM alignments with REF_SKIP CIGAR operations longer than this. The default is 300,000.' )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
18 parser.add_option( '-F', '--min-isoform-fraction', dest='min_isoform_fraction', help='After calculating isoform abundance for a gene, Cufflinks filters out transcripts that it believes are very low abundance, because isoforms expressed at extremely low levels often cannot reliably be assembled, and may even be artifacts of incompletely spliced precursors of processed transcripts. This parameter is also used to filter out introns that have far fewer spliced alignments supporting them. The default is 0.05, or 5% of the most abundant isoform (the major isoform) of the gene.' )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
19 parser.add_option( '-j', '--pre-mrna-fraction', dest='pre_mrna_fraction', help='Some RNA-Seq protocols produce a significant amount of reads that originate from incompletely spliced transcripts, and these reads can confound the assembly of fully spliced mRNAs. Cufflinks uses this parameter to filter out alignments that lie within the intronic intervals implied by the spliced alignments. The minimum depth of coverage in the intronic region covered by the alignment is divided by the number of spliced reads, and if the result is lower than this parameter value, the intronic alignments are ignored. The default is 5%.' )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
20 parser.add_option( '-p', '--num-threads', dest='num_threads', help='Use this many threads to align reads. The default is 1.' )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
21 parser.add_option( '-G', '--GTF', dest='GTF', help='Tells Cufflinks to use the supplied reference annotation to estimate isoform expression. It will not assemble novel transcripts, and the program will ignore alignments not structurally compatible with any reference transcript.' )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
22 parser.add_option ("--compatible-hits-norm",dest='compatible_hits_norm',help='Count hits compatible with reference RNAs only')
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
23 parser.add_option( '-g', '--GTF-guide', dest='GTFguide', help='use reference transcript annotation to guide assembly' )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
24 parser.add_option("--3-overhang-tolerance",dest='3_overhang_tolerance', help='The number of bp allowed to overhang the 3prime end of a reference transcript when determining if an assembled transcript should be merged with it (ie, the assembled transcript is not novel). The default is 600 bp.')
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
25 parser.add_option("--intron-overhang-tolerance",dest='intron_overhang_tolerance',help='The number of bp allowed to enter the intron of a reference transcript when determining if an assembled transcript should be merged with it (ie, the assembled transcript is not novel). The default is 50 bp.')
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
26 parser.add_option("--no-faux-reads", dest='no_faux_reads',help='This option disables tiling of the reference transcripts with faux reads. Use this if you only want to use sequencing reads in assembly but do not want to output assembled transcripts that lay within reference transcripts. All reference transcripts in the input annotation will also be included in the output.')
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
27 parser.add_option( '-u', '--multi-read-correct', dest='multi_read_correct', action="store_true", help='Tells Cufflinks to do an initial estimation procedure to more accurately weight reads mapping to multiple locations in the genome')
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
28
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
29 # Normalization options.
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
30 parser.add_option( "--no-effective-length-correction", dest="no_effective_length_correction", action="store_true" )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
31 parser.add_option( "--no-length-correction", dest="no_length_correction", action="store_true" )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
32
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
33 # Wrapper / Galaxy options.
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
34 parser.add_option( '-A', '--assembled-isoforms-output', dest='assembled_isoforms_output_file', help='Assembled isoforms output file; formate is GTF.' )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
35
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
36 # Advanced Options:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
37 parser.add_option( "--library-type",dest="library_type",help=' library prep used for input reads, default fr-unstranded')
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
38 parser.add_option( '-M','--mask-file', dest='mask_file', help='Tells Cufflinks to ignore all reads that could have come from transcripts in this GTF file. \
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
39 We recommend including any annotated rRNA, mitochondrial transcripts other abundant transcripts \
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
40 you wish to ignore in your analysis in this file. Due to variable efficiency of mRNA enrichment \
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
41 methods and rRNA depletion kits, masking these transcripts often improves the overall robustness \
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
42 of transcript abundance estimates.')
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
43 parser.add_option( '-m', '--inner-mean-dist', dest='inner_mean_dist', help='This is the expected (mean) inner distance between mate pairs. \
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
44 For, example, for paired end runs with fragments selected at 300bp, \
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
45 where each end is 50bp, you should set -r to be 200. The default is 45bp.') # cufflinks: --frag-len-mean
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
46
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
47 parser.add_option( '-s', '--inner-dist-std-dev', dest='inner_dist_std_dev', help='The standard deviation for the distribution on inner distances between mate pairs. The default is 20bp.' ) # cufflinks: --frag-len-std-dev
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
48 parser.add_option( '--max-mle-iterations', dest='max_mle_iterations', help='Sets the number of iterations allowed during maximum likelihood estimation of abundances. Default: 5000' )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
49 parser.add_option( '--junc-alpha', dest='junc_alpha', help='Alpha value for the binomial test used during false positive spliced alignment filtration. Default: 0.001' )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
50 parser.add_option( '--small-anchor-fraction', dest='small_anchor_fraction', help='Spliced reads with less than this percent of their length on each side of\
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
51 the junction are considered suspicious and are candidates for filtering prior to assembly. Default: 0.09.' )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
52 parser.add_option( '--overhang-tolerance', dest='overhang_tolerance', help='The number of bp allowed to enter the intron of a transcript when determining if a \
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
53 read or another transcript is mappable to/compatible with it. The default is 8 bp based on the default bowtie/TopHat parameters.' )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
54 parser.add_option( '--max-bundle-length', dest='max_bundle_length', help='Maximum genomic length of a given bundle" help="Default: 3,500,000bp' )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
55 parser.add_option( '--max-bundle-frags', dest='max_bundle_frags', help='Sets the maximum number of fragments a locus may have before being skipped. Skipped loci are listed in skipped.gtf. Default: 1,000,000' )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
56 parser.add_option( '--min-intron-length', dest='min_intron_length', help='Minimal allowed intron size. Default: 50' )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
57 parser.add_option( '--trim-3-avgcov-thresh', dest='trim_3_avgcov_thresh', help='Minimum average coverage required to attempt 3prime trimming. Default: 10' )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
58 parser.add_option( '--trim-3-dropoff-frac', dest='trim_3_dropoff_frac', help='The fraction of average coverage below which to trim the 3prime end of an assembled transcript. Default: 0.1' )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
59
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
60
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
61 # Bias correction options.
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
62 parser.add_option( '-b', dest='do_bias_correction', action="store_true", help='Providing Cufflinks with a multifasta file via this option instructs it to run our new bias detection and correction algorithm which can significantly improve accuracy of transcript abundance estimates.')
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
63 parser.add_option( '', '--index', dest='index', help='The path of the reference genome' )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
64 parser.add_option( '', '--ref_file', dest='ref_file', help='The reference dataset from the history' )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
65
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
66 # Global model (for trackster).
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
67 parser.add_option( '', '--global_model', dest='global_model_file', help='Global model used for computing on local data' )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
68
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
69 (options, args) = parser.parse_args()
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
70
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
71 # output version # of tool
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
72 try:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
73 tmp = tempfile.NamedTemporaryFile().name
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
74 tmp_stdout = open( tmp, 'wb' )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
75 proc = subprocess.Popen( args='cufflinks --no-update-check 2>&1', shell=True, stdout=tmp_stdout )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
76 tmp_stdout.close()
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
77 returncode = proc.wait()
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
78 stdout = None
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
79 for line in open( tmp_stdout.name, 'rb' ):
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
80 if line.lower().find( 'cufflinks v' ) >= 0:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
81 stdout = line.strip()
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
82 break
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
83 if stdout:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
84 sys.stdout.write( '%s\n' % stdout )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
85 else:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
86 raise Exception
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
87 except:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
88 sys.stdout.write( 'Could not determine Cufflinks version\n' )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
89
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
90 # If doing bias correction, set/link to sequence file.
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
91 if options.do_bias_correction:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
92 if options.ref_file:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
93 # Sequence data from history.
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
94 # Create symbolic link to ref_file so that index will be created in working directory.
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
95 seq_path = "ref.fa"
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
96 os.symlink( options.ref_file, seq_path )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
97 else:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
98 if not os.path.exists( options.index ):
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
99 stop_err( 'Reference genome %s not present, request it by reporting this error.' % options.index )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
100 seq_path = options.index
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
101
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
102 # Build command.
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
103
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
104 # Base; always use quiet mode to avoid problems with storing log output.
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
105 cmd = "cufflinks -q --no-update-check"
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
106
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
107 # Add options.
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
108 if options.max_intron_len:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
109 cmd += ( " -I %i" % int ( options.max_intron_len ) )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
110 if options.min_isoform_fraction:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
111 cmd += ( " -F %f" % float ( options.min_isoform_fraction ) )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
112 if options.pre_mrna_fraction:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
113 cmd += ( " -j %f" % float ( options.pre_mrna_fraction ) )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
114 if options.num_threads:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
115 cmd += ( " -p %i" % int ( options.num_threads ) )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
116 if options.GTF:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
117 cmd += ( " -G %s" % options.GTF )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
118 if options.compatible_hits_norm:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
119 cmd += ( " --compatible-hits-norm" )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
120 if options.GTFguide:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
121 cmd += ( " -g %s" % options.GTFguide )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
122 cmd += ( " --3-overhang-tolerance %i" % int ( options.3_overhang_tolerance ) )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
123 cmd += ( " --intron-overhang-tolerance %i" % int ( options.intron_overhang_tolerance ) )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
124 if options.no_faux_reads:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
125 cmd += ( " --no-faux-reads" )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
126 if options.multi_read_correct:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
127 cmd += ( " -u" )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
128
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
129 if options.library_type && options.library_type != 'auto':
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
130 cmd += ( " --library-type %s" % options.library_type)
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
131 if options.mask_file:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
132 cmd += ( " --mask-file %s" % options.mask_file )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
133 if options.inner_mean_dist:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
134 cmd += ( " -m %i" % int ( options.inner_mean_dist ) )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
135 if options.inner_dist_std_dev:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
136 cmd += ( " -s %i" % int ( options.inner_dist_std_dev ) )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
137 if options.max_mle_iterations:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
138 cmd += ( " --max-mle-iterations %i" % int ( options.max_mle_iterations ) )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
139 if options.junc_alpha:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
140 cmd += ( " --junc-alpha %f" % float ( options.junc_alpha) )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
141 if options.small_anchor_fraction:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
142 cmd += ( " --small-anchor-fraction %f" % float (options.small_anchor_fraction ) )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
143 if options.overhang_tolerance:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
144 cmd += ( " --overhang-tolerance %i" % int ( options.overhang_tolerance ) )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
145 if options.max_bundle_length:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
146 cmd += ( " --max-bundle-length %i" % int ( options.max_bundle_length ) )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
147 if options.max_bundle_frags:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
148 cmd += ( " --max-bundle-frags %i" % int ( options.max_bundle_frags ) )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
149 if options.min_intron_length:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
150 cmd += ( " --min-intron-length %i" % int ( options.min_intron_length ) )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
151 if options.trim_3_avgcov_thresh:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
152 cmd += ( " --trim-3-avgcov-thresh %i" % int ( options.trim_3_avgcov_thresh ) )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
153 if options.trim_3_dropoff_frac:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
154 cmd += ( " --trim-3-dropoff-frac %f" % float ( options.trim_3_dropoff_frac ) )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
155
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
156 if options.do_bias_correction:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
157 cmd += ( " -b %s" % seq_path )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
158 if options.no_effective_length_correction:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
159 cmd += ( " --no-effective-length-correction" )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
160 if options.no_length_correction:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
161 cmd += ( " --no-length-correction" )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
162
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
163 # Add input files.
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
164 cmd += " " + options.input
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
165
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
166 # Debugging.
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
167 print cmd
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
168
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
169 #
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
170 # Run command and handle output.
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
171 #
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
172 try:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
173 #
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
174 # Run command.
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
175 #
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
176 tmp_name = tempfile.NamedTemporaryFile( dir="." ).name
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
177 tmp_stderr = open( tmp_name, 'wb' )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
178 proc = subprocess.Popen( args=cmd, shell=True, stderr=tmp_stderr.fileno() )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
179 returncode = proc.wait()
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
180 tmp_stderr.close()
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
181
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
182 # Error checking.
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
183 if returncode != 0:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
184 raise Exception, "return code = %i" % returncode
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
185
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
186 #
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
187 # Handle output.
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
188 #
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
189
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
190 # Read standard error to get total map/upper quartile mass.
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
191 total_map_mass = -1
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
192 tmp_stderr = open( tmp_name, 'r' )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
193 for line in tmp_stderr:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
194 if line.lower().find( "map mass" ) >= 0 or line.lower().find( "upper quartile" ) >= 0:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
195 total_map_mass = float( line.split(":")[1].strip() )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
196 break
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
197 tmp_stderr.close()
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
198
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
199 #
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
200 # If there's a global model provided, use model's total map mass
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
201 # to adjust FPKM + confidence intervals.
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
202 #
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
203 if options.global_model_file:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
204 # Global model is simply total map mass from original run.
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
205 global_model_file = open( options.global_model_file, 'r' )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
206 global_model_total_map_mass = float( global_model_file.readline() )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
207 global_model_file.close()
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
208
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
209 # Ratio of global model's total map mass to original run's map mass is
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
210 # factor used to adjust FPKM.
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
211 fpkm_map_mass_ratio = total_map_mass / global_model_total_map_mass
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
212
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
213 # Update FPKM values in transcripts.gtf file.
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
214 transcripts_file = open( "transcripts.gtf", 'r' )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
215 tmp_transcripts = tempfile.NamedTemporaryFile( dir="." ).name
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
216 new_transcripts_file = open( tmp_transcripts, 'w' )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
217 for line in transcripts_file:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
218 fields = line.split( '\t' )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
219 attrs = parse_gff_attributes( fields[8] )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
220 attrs[ "FPKM" ] = str( float( attrs[ "FPKM" ] ) * fpkm_map_mass_ratio )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
221 attrs[ "conf_lo" ] = str( float( attrs[ "conf_lo" ] ) * fpkm_map_mass_ratio )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
222 attrs[ "conf_hi" ] = str( float( attrs[ "conf_hi" ] ) * fpkm_map_mass_ratio )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
223 fields[8] = gff_attributes_to_str( attrs, "GTF" )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
224 new_transcripts_file.write( "%s\n" % '\t'.join( fields ) )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
225 transcripts_file.close()
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
226 new_transcripts_file.close()
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
227 shutil.copyfile( tmp_transcripts, "transcripts.gtf" )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
228
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
229 # TODO: update expression files as well.
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
230
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
231 # Set outputs. Transcript and gene expression handled by wrapper directives.
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
232 shutil.copyfile( "transcripts.gtf" , options.assembled_isoforms_output_file )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
233 if total_map_mass > -1:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
234 f = open( "global_model.txt", 'w' )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
235 f.write( "%f\n" % total_map_mass )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
236 f.close()
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
237 except Exception, e:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
238 # Read stderr so that it can be reported:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
239 tmp_stderr = open( tmp_name, 'rb' )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
240 stderr = ''
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
241 buffsize = 1048576
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
242 try:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
243 while True:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
244 stderr += tmp_stderr.read( buffsize )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
245 if not stderr or len( stderr ) % buffsize != 0:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
246 break
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
247 except OverflowError:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
248 pass
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
249 tmp_stderr.close()
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
250
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
251 stop_err( 'Error running cufflinks.\n%s\n%s' % ( str( e ), stderr ) )
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
252
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
253 if __name__=="__main__": __main__()