Mercurial > repos > hackdna > fastqc
comparison fastqc_checker.py @ 0:d8d131d08779 draft default tip
Initial upload.
author | hackdna |
---|---|
date | Tue, 21 May 2013 11:48:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d8d131d08779 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 ''' | |
4 FastQC checker for Galaxy biomedical data analysis platform | |
5 | |
6 @author: Ilya Sytchev | |
7 | |
8 Input: one or more files in fastq format | |
9 Output: sequencing quality report in text format | |
10 | |
11 Requires FastQC 0.10.0 (http://www.bioinformatics.bbsrc.ac.uk/projects/fastqc/) | |
12 | |
13 Partially based on: | |
14 fastqcwrapper (http://toolshed.g2.bx.psu.edu/repos/jjohnson/fastqc) | |
15 rgFastQC (https://bitbucket.org/galaxy/galaxy-dist/src/tip/tools/rgenetics/rgFastQC.py) | |
16 | |
17 Tested with Python 2.6.1 and 2.7.2 on Mac OS 10.6.8 | |
18 ''' | |
19 | |
20 import sys, os, optparse, tempfile, shutil, subprocess | |
21 | |
22 def stop_err(msg, returncode=1): | |
23 sys.stderr.write(msg) | |
24 sys.exit(returncode) | |
25 | |
26 def __main__(): | |
27 usage = "Usage: %prog -e fastqc_executable -o output_file fastq_file [fastq_file ... ]" | |
28 version = "%prog 1.0.0" | |
29 op = optparse.OptionParser(usage=usage, version=version) | |
30 op.add_option('-e', '--executable', dest="executable", help="location of the FastQC program") | |
31 op.add_option('-o', '--output', dest="outfile", help="location of the output file") | |
32 (options, infiles) = op.parse_args() | |
33 | |
34 # check if location of the FastQC program was provided | |
35 if options.executable == None: | |
36 op.error("Missing location of FastQC") | |
37 | |
38 # check if FastQC program exists at the provided location | |
39 if not os.path.isfile(options.executable): | |
40 op.error("Cannot find FastQC at %s" % options.executable) | |
41 | |
42 # check if any input files were provided | |
43 if infiles == None: | |
44 op.error("Missing input files") | |
45 | |
46 # check if all input files exist | |
47 for f in infiles: | |
48 if not os.path.isfile(f): | |
49 op.error("Cannot find input file %s" % f) | |
50 | |
51 # check if output file was provided | |
52 if options.outfile == None: | |
53 op.error("Missing output file name") | |
54 | |
55 # assemble FastQC command line | |
56 cmd = [] # list is more secure than string for subprocess call | |
57 cmd.append(options.executable) | |
58 tmpdir = tempfile.mkdtemp() # create temp dir for FastQC output | |
59 cmd.extend(['-o', tmpdir]) | |
60 cmd.extend(infiles) | |
61 | |
62 # prepare files for FastQC stdout and stderr | |
63 tmp_stderr_name = tempfile.NamedTemporaryFile(dir=tmpdir, suffix='.err').name | |
64 tmp_stderr = open(tmp_stderr_name, 'w') | |
65 tmp_stdout_name = tempfile.NamedTemporaryFile(dir=tmpdir, suffix='.out').name | |
66 tmp_stdout = open(tmp_stdout_name, 'w') | |
67 # run FastQC | |
68 try: | |
69 subprocess.check_call(cmd, stderr=tmp_stderr.fileno(), stdout=tmp_stdout.fileno()) | |
70 except subprocess.CalledProcessError as e: | |
71 stop_err("Error executing FastQC\n", e.returncode) | |
72 finally: | |
73 tmp_stderr.close() | |
74 tmp_stdout.close() | |
75 | |
76 outfile = open(options.outfile, 'w') | |
77 | |
78 # parse all summary.txt files produced by FastQC and write results into the output file | |
79 for f in infiles: | |
80 filename = os.path.basename(f) | |
81 (datasetname, extension) = os.path.splitext(filename) | |
82 # Need to account for FastQC removing .fastq extension from input file names before using them to create output file and dir names | |
83 # Alternative solution is to iterate over report directories instead of input file names | |
84 if extension == '.fastq': | |
85 summaryfilename = os.path.join(tmpdir, datasetname + '_fastqc', 'summary.txt') | |
86 else: | |
87 summaryfilename = os.path.join(tmpdir, filename + '_fastqc', 'summary.txt') | |
88 outfile.write("%s results:\n" % datasetname) | |
89 # if summary file exists, process and add results to the output file | |
90 if os.path.isfile(summaryfilename): | |
91 summaryfile = open(summaryfilename, 'r') | |
92 for line in summaryfile: | |
93 (result, test) = line.split('\t')[:2] | |
94 outfile.write(result + '\t' + test + '\n') | |
95 summaryfile.close() | |
96 else: | |
97 outfile.write("FastQC summary report was not found at %s.\n" % summaryfilename) | |
98 outfile.write("\n") | |
99 | |
100 outfile.close() | |
101 | |
102 # clean up temp dir, put in a try block so we don't fail on stale nfs handles | |
103 try: | |
104 if os.path.exists(tmpdir): | |
105 shutil.rmtree(tmpdir) | |
106 except: | |
107 pass | |
108 | |
109 if __name__ == '__main__': __main__() |