Mercurial > repos > tmcgowan > fastqc

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rgFastQC.py	Fri Sep 12 11:55:50 2014 -0400
@@ -0,0 +1,66 @@
+"""
+Rewrite of rgFastQC.py for v. 0.11.2 of FastQC
+
+"""
+import re
+import os
+import sys
+import subprocess
+import optparse
+import shutil
+import tempfile
+import zipfile
+import gzip
+import glob
+
+class FastQCRunner(object):
+
+    def __init__(self,opts=None):
+        assert opts <> None
+        self.opts = opts
+
+    def prepare_command_line(self):
+        self.fastqinfilename = re.sub(ur'[^a-zA-Z0-9_\-\.]', '_', os.path.basename(self.opts.inputfilename))
+        command_line = [opts.executable, '--outdir %s' % opts.outputdir]
+        if opts.contaminants <> None :
+            command_line.append('--contaminants %s' % opts.contaminants)
+        command_line.append('--quiet %s' % self.fastqinfilename)
+        self.command_line = ' '.join(command_line)
+
+    def copy_working_file_to_dataset(self):
+        result_file = glob.glob('*html')
+        os.system('cp %s %s' % (result_file[0], self.opts.htmloutput))
+
+
+    def run_fastqc(self):
+
+        dummy,tlog = tempfile.mkstemp(prefix='rgFastQC',suffix=".log",dir=self.opts.outputdir)
+        sout = open(tlog, 'w')
+
+        self.prepare_command_line()
+        sout.write(self.command_line)
+        sout.write('\n')
+        sout.write("Creating symlink\n")
+        os.symlink(self.opts.input, self.fastqinfilename)
+        sout.write("check_call\n")
+        subprocess.check_call(self.command_line, shell=True)
+        sout.write("Copying working %s file to %s \n" % (self.fastqinfilename, self.opts.htmloutput))
+        self.copy_working_file_to_dataset()
+        sout.write("Finished")
+        sout.close()
+
+
+if __name__ == '__main__':
+    op = optparse.OptionParser()
+    op.add_option('-i', '--input', default=None)
+    op.add_option('-j', '--inputfilename', default=None)
+    op.add_option('-o', '--htmloutput', default=None)
+    op.add_option('-d', '--outputdir', default="/tmp/shortread")
+    op.add_option('-f', '--informat', default='fastq')
+    op.add_option('-n', '--namejob', default='rgFastQC')
+    op.add_option('-c', '--contaminants', default=None)
+    op.add_option('-e', '--executable', default='fastqc')
+    opts, args = op.parse_args()
+
+    fastqc_runner = FastQCRunner(opts)
+    fastqc_runner.run_fastqc()
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rgFastQC.xml	Fri Sep 12 11:55:50 2014 -0400
@@ -0,0 +1,101 @@
+<tool name="FastQC:Read QC" id="fastqc" version="0.60">
+  <description>reports using FastQC</description>
+  <command interpreter="python">
+    rgFastQC.py -i "$input_file" -d . -o "$html_file" -n "$out_prefix" -f "$input_file.ext" -j "$input_file.name" -e "\$JAVA_JAR_PATH/fastqc"
+#if $contaminants.dataset and str($contaminants) > ''
+-c "$contaminants"
+#end if
+  </command>
+  <requirements>
+    <requirement type="package" version="0.11.2">FastQC</requirement>
+  </requirements>
+  <inputs>
+    <param format="fastqsanger,fastq,bam,sam" name="input_file" type="data" label="Short read data from your current history" />
+    <param name="out_prefix" value="FastQC" type="text" label="Title for the output file - to remind you what the job was for" size="80"
+      help="Letters and numbers only please - other characters will be removed">
+    <sanitizer invalid_char="">
+        <valid initial="string.letters,string.digits"/>
+    </sanitizer>
+    </param>
+    <param name="contaminants" type="data" format="tabular" optional="true" label="Contaminant list"
+           help="tab delimited file with 2 columns: name and sequence.  For example: Illumina Small RNA RT Primer	CAAGCAGAAGACGGCATACGA"/>
+  </inputs>
+  <outputs>
+    <data format="html" name="html_file"  label="${out_prefix}_${input_file.name}.html" />
+  </outputs>
+  <tests>
+    <test>
+      <param name="input_file" value="1000gsample.fastq" />
+      <param name="out_prefix" value="fastqc_out" />
+      <param name="contaminants" value="fastqc_contaminants.txt" ftype="tabular" />
+      <output name="html_file" file="fastqc_report.html" ftype="html" lines_diff="100"/>
+    </test>
+  </tests>
+  <help>
+
+.. class:: infomark
+
+**Purpose**
+
+FastQC aims to provide a simple way to do some quality control checks on raw
+sequence data coming from high throughput sequencing pipelines.
+It provides a modular set of analyses which you can use to give a quick
+impression of whether your data has any problems of
+which you should be aware before doing any further analysis.
+
+The main functions of FastQC are:
+
+- Import of data from BAM, SAM or FastQ files (any variant)
+- Providing a quick overview to tell you in which areas there may be problems
+- Summary graphs and tables to quickly assess your data
+- Export of results to an HTML based permanent report
+- Offline operation to allow automated generation of reports without running the interactive application
+
+
+-----
+
+
+.. class:: infomark
+
+**FastQC**
+
+This is a Galaxy wrapper. It merely exposes the external package FastQC_ which is documented at FastQC_
+Kindly acknowledge it as well as this tool if you use it.
+FastQC incorporates the Picard-tools_ libraries for sam/bam processing.
+
+The contaminants file parameter was borrowed from the independently developed
+fastqcwrapper contributed to the Galaxy Community Tool Shed by J. Johnson.
+
+-----
+
+.. class:: infomark
+
+**Inputs and outputs**
+
+FastQC_ is the best place to look for documentation - it's very good.
+A summary follows below for those in a tearing hurry.
+
+This wrapper will accept a Galaxy fastq, sam or bam as the input read file to check.
+It will also take an optional file containing a list of contaminants information, in the form of
+a tab-delimited file with 2 columns, name and sequence.
+
+The tool produces a single HTML output file that contains all of the results, including the following:
+
+- Basic Statistics
+- Per base sequence quality
+- Per sequence quality scores
+- Per base sequence content
+- Per base GC content
+- Per sequence GC content
+- Per base N content
+- Sequence Length Distribution
+- Sequence Duplication Levels
+- Overrepresented sequences
+- Kmer Content
+
+All except Basic Statistics and Overrepresented sequences are plots.
+ .. _FastQC: http://www.bioinformatics.bbsrc.ac.uk/projects/fastqc/
+ .. _Picard-tools: http://picard.sourceforge.net/index.shtml
+
+</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Fri Sep 12 11:55:50 2014 -0400
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+  <package name="FastQC" version="0.11.2">
+      <repository changeset_revision="9b285cb34c00" name="package_fastqc_0_11_2" owner="tmcgowan" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>