# HG changeset patch # User tmcgowan # Date 1410537350 14400 # Node ID d91b89b552f33d6e5ec3c46be1f8568eae37e478 Imported from capsule None diff -r 000000000000 -r d91b89b552f3 rgFastQC.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rgFastQC.py Fri Sep 12 11:55:50 2014 -0400 @@ -0,0 +1,66 @@ +""" +Rewrite of rgFastQC.py for v. 0.11.2 of FastQC + +""" +import re +import os +import sys +import subprocess +import optparse +import shutil +import tempfile +import zipfile +import gzip +import glob + +class FastQCRunner(object): + + def __init__(self,opts=None): + assert opts <> None + self.opts = opts + + def prepare_command_line(self): + self.fastqinfilename = re.sub(ur'[^a-zA-Z0-9_\-\.]', '_', os.path.basename(self.opts.inputfilename)) + command_line = [opts.executable, '--outdir %s' % opts.outputdir] + if opts.contaminants <> None : + command_line.append('--contaminants %s' % opts.contaminants) + command_line.append('--quiet %s' % self.fastqinfilename) + self.command_line = ' '.join(command_line) + + def copy_working_file_to_dataset(self): + result_file = glob.glob('*html') + os.system('cp %s %s' % (result_file[0], self.opts.htmloutput)) + + + def run_fastqc(self): + + dummy,tlog = tempfile.mkstemp(prefix='rgFastQC',suffix=".log",dir=self.opts.outputdir) + sout = open(tlog, 'w') + + self.prepare_command_line() + sout.write(self.command_line) + sout.write('\n') + sout.write("Creating symlink\n") + os.symlink(self.opts.input, self.fastqinfilename) + sout.write("check_call\n") + subprocess.check_call(self.command_line, shell=True) + sout.write("Copying working %s file to %s \n" % (self.fastqinfilename, self.opts.htmloutput)) + self.copy_working_file_to_dataset() + sout.write("Finished") + sout.close() + + +if __name__ == '__main__': + op = optparse.OptionParser() + op.add_option('-i', '--input', default=None) + op.add_option('-j', '--inputfilename', default=None) + op.add_option('-o', '--htmloutput', default=None) + op.add_option('-d', '--outputdir', default="/tmp/shortread") + op.add_option('-f', '--informat', default='fastq') + op.add_option('-n', '--namejob', default='rgFastQC') + op.add_option('-c', '--contaminants', default=None) + op.add_option('-e', '--executable', default='fastqc') + opts, args = op.parse_args() + + fastqc_runner = FastQCRunner(opts) + fastqc_runner.run_fastqc() \ No newline at end of file diff -r 000000000000 -r d91b89b552f3 rgFastQC.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rgFastQC.xml Fri Sep 12 11:55:50 2014 -0400 @@ -0,0 +1,101 @@ + + reports using FastQC + + rgFastQC.py -i "$input_file" -d . -o "$html_file" -n "$out_prefix" -f "$input_file.ext" -j "$input_file.name" -e "\$JAVA_JAR_PATH/fastqc" +#if $contaminants.dataset and str($contaminants) > '' +-c "$contaminants" +#end if + + + FastQC + + + + + + + + + + + + + + + + + + + + + + + +.. class:: infomark + +**Purpose** + +FastQC aims to provide a simple way to do some quality control checks on raw +sequence data coming from high throughput sequencing pipelines. +It provides a modular set of analyses which you can use to give a quick +impression of whether your data has any problems of +which you should be aware before doing any further analysis. + +The main functions of FastQC are: + +- Import of data from BAM, SAM or FastQ files (any variant) +- Providing a quick overview to tell you in which areas there may be problems +- Summary graphs and tables to quickly assess your data +- Export of results to an HTML based permanent report +- Offline operation to allow automated generation of reports without running the interactive application + + +----- + + +.. class:: infomark + +**FastQC** + +This is a Galaxy wrapper. It merely exposes the external package FastQC_ which is documented at FastQC_ +Kindly acknowledge it as well as this tool if you use it. +FastQC incorporates the Picard-tools_ libraries for sam/bam processing. + +The contaminants file parameter was borrowed from the independently developed +fastqcwrapper contributed to the Galaxy Community Tool Shed by J. Johnson. + +----- + +.. class:: infomark + +**Inputs and outputs** + +FastQC_ is the best place to look for documentation - it's very good. +A summary follows below for those in a tearing hurry. + +This wrapper will accept a Galaxy fastq, sam or bam as the input read file to check. +It will also take an optional file containing a list of contaminants information, in the form of +a tab-delimited file with 2 columns, name and sequence. + +The tool produces a single HTML output file that contains all of the results, including the following: + +- Basic Statistics +- Per base sequence quality +- Per sequence quality scores +- Per base sequence content +- Per base GC content +- Per sequence GC content +- Per base N content +- Sequence Length Distribution +- Sequence Duplication Levels +- Overrepresented sequences +- Kmer Content + +All except Basic Statistics and Overrepresented sequences are plots. + .. _FastQC: http://www.bioinformatics.bbsrc.ac.uk/projects/fastqc/ + .. _Picard-tools: http://picard.sourceforge.net/index.shtml + + + diff -r 000000000000 -r d91b89b552f3 tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Fri Sep 12 11:55:50 2014 -0400 @@ -0,0 +1,6 @@ + + + + + +