# HG changeset patch # User jbrayet # Date 1455111417 18000 # Node ID ad2fdf5afa67316abf15baba65fb0c9523ba084a # Parent 925b295d41b81699422f14d197c4a82f0361f7b9 Uploaded diff -r 925b295d41b8 -r ad2fdf5afa67 rgFastQC.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rgFastQC.xml Wed Feb 10 08:36:57 2016 -0500 @@ -0,0 +1,121 @@ + + Read Quality reports + + institutcuriengsintegration/fastqc:0.11.4 + + + + + + + + + rgFastQC.py + -i "$input_file" + -d "$html_file.files_path" + -o "$html_file" + -t "$text_file" + -f "$input_file.ext" + -j "$input_file.name" + -e "/usr/bin/fastqc/FastQC/fastqc" + #if $contaminants.dataset and str($contaminants) > '' + -c "$contaminants" + #end if + #if $limits.dataset and str($limits) > '' + -l "$limits" + #end if + + + + + + + + + + + + + +.. class:: infomark + +**Purpose** + +FastQC aims to provide a simple way to do some quality control checks on raw +sequence data coming from high throughput sequencing pipelines. +It provides a modular set of analyses which you can use to give a quick +impression of whether your data has any problems of +which you should be aware before doing any further analysis. + +The main functions of FastQC are: + +- Import of data from BAM, SAM or FastQ files (any variant) +- Providing a quick overview to tell you in which areas there may be problems +- Summary graphs and tables to quickly assess your data +- Export of results to an HTML based permanent report +- Offline operation to allow automated generation of reports without running the interactive application + + +----- + + +.. class:: infomark + +**FastQC** + +This is a Galaxy wrapper. It merely exposes the external package FastQC_ which is documented at FastQC_ +Kindly acknowledge it as well as this tool if you use it. +FastQC incorporates the Picard-tools_ libraries for sam/bam processing. + +The contaminants file parameter was borrowed from the independently developed +fastqcwrapper contributed to the Galaxy Community Tool Shed by J. Johnson. +Adaption to version 0.11.2 by T. McGowan. + +----- + +.. class:: infomark + +**Inputs and outputs** + +FastQC_ is the best place to look for documentation - it's very good. +A summary follows below for those in a tearing hurry. + +This wrapper will accept a Galaxy fastq, sam or bam as the input read file to check. +It will also take an optional file containing a list of contaminants information, in the form of +a tab-delimited file with 2 columns, name and sequence. As another option the tool takes a custom +limits.txt file that allows setting the warning thresholds for the different modules and also specifies +which modules to include in the output. + +The tool produces a basic text and a HTML output file that contain all of the results, including the following: + +- Basic Statistics +- Per base sequence quality +- Per sequence quality scores +- Per base sequence content +- Per base GC content +- Per sequence GC content +- Per base N content +- Sequence Length Distribution +- Sequence Duplication Levels +- Overrepresented sequences +- Kmer Content + +All except Basic Statistics and Overrepresented sequences are plots. + .. _FastQC: http://www.bioinformatics.bbsrc.ac.uk/projects/fastqc/ + .. _Picard-tools: http://picard.sourceforge.net/index.shtml + + + + + @ARTICLE{andrews_s, + author = {Andrews, S.}, + keywords = {bioinformatics, ngs, qc}, + priority = {2}, + title = {{FastQC A Quality Control tool for High Throughput Sequence Data}}, + url = {http://www.bioinformatics.babraham.ac.uk/projects/fastqc/} + } + + +