Mercurial > repos > fubar > genomic_association_tester
changeset 0:d8a6a6d67727 draft
initial commit
author | fubar |
---|---|
date | Tue, 27 Aug 2013 23:16:38 -0400 |
parents | |
children | 98db9665b15e |
files | rlGAT/rlGAT.xml rlGAT/tool_dependencies.xml |
diffstat | 2 files changed, 201 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rlGAT/rlGAT.xml Tue Aug 27 23:16:38 2013 -0400 @@ -0,0 +1,166 @@ +<tool id="rlGAT" name="Genomic association tests" version="0.1"> + <description>using the GAT package</description> + <stdio> + <regex match=".*" source="both" level="warning" description="stdout/err chatter from gat-run.py"/> + </stdio> + <requirements> + <requirement type="package" version="0.1">GAT</requirement> + <requirement type="package" version="1.2.1">matplotliblite</requirement> + </requirements> + <command> + gat-run.py ${ignore_segments} --log="$logfile" --num-samples=$numsamp + #for $s in $segmentfiles: + #if $s.sf.ext != 'data': + --segment-file "${s.sf}" + #end if + #end for + #for $s in $workspacefiles: + #if $s.wsf.ext != 'data': + --workspace-file "${s.wsf}" + #end if + #end for + #for $s in $annotationfiles: + #if $s.anno.ext != 'data': + --annotation-file "${s.anno}" + #end if + #end for + #for $s in $isochorefiles: + #if $s.iso.ext != 'data': + --isochore-file "${s.iso}" + #end if + #end for + > ${outfile} + </command> + <inputs> + <param name="title" label="Name for this job's output file" type="text" size="80" value="GAT"/> + <param name="numsamp" type="integer" value="1000" size="5" label="Number of simulations - longer takes longer - 1000 is usually a reasonable choice" /> + <param name="counter" type="select" label="Counting methods to use - multiple will generate multiple output files" multiple="True" + help="Use ctrl to select multiple counters. If in doubt, read the fine manual"> + <option value="nucleotide-overlap" selected="true">nucleotide-overlap: number of bases overlapping</option> + <option value="segment-overlap">segment-overlap: number of intervals intervals in the segments of interest overlapping annotations. A single base-pair overlap is sufficient</option> + <option value="segment-mid-overlap">segment-mid-overlap: number of intervals in the segments of interest overlapping at their midpoint annotations</option> + <option value="annotations-overlap">annotations-overlap: number of intervals in the annotations overlapping segments of interest. A single base-pair overlap is sufficient.</option> + <option value="annotations-mid-overlap">annotations-mid-overlap: number of intervals in the segments of interest overlapping annotations at their midpoint</option> + </param> + + <repeat name="workspacefiles" min="1" title="Bed files containing contigs to analyse as the workspace" help="Must have 1 or more. Multiples will be concatenated to form the work space of regions for the analysis"> + <param name="wsf" type="data" label="Workspace (BED) format file(s) from your history" format="bed,interval" size="100" + optional="false"/> + </repeat> + <repeat name="segmentfiles" min="1" + title="Bed files containing segments (features) to analyse" help="Must have 1 or more. Make these the shorter files when choosing between segment and annotation - please read the fine manual"> + <param name="sf" type="data" label="Segment (BED) format file from your history" format="bed,interval" size="100" + optional="false"/> + </repeat> + <param name="ignore_segments" type="select" label="Ignore 4th BED column when determining segments (default)" help="Otherwise GAT treats each distinct 4th column name as a distincts segment for testing"> + <option value="--ignore-segment-tracks" selected="true">Ignore segment tracks</option> + <option value="">Use 4th bed column to determine segments - may use lots'o'ram - see GAT documentation</option> + </param> + <repeat name="annotationfiles" min="1" + title="Bed files containing annotations to analyse for each segment" help="Make these the longer files when choosing between segments and annotations. Please read the fine manual"> + <param name="anno" type="data" label="Annotation (BED) format file from your history" format="bed,interval" size="100" + optional="false"/> + </repeat> + <repeat name="isochorefiles" + title="Optional BED files containing isochores to split the workspace(s) during simulation" help="Read the fine manual"> + <param name="iso" type="data" label="Isochore (BED) format file from your history" format="bed,interval" size="100" + optional="true"/> + </repeat> + + </inputs> + <outputs> + <data format="tabular" name="outfile" label="${title}_gatout.tsv" /> + <data format="txt" name="logfile" label="${title}_gatrun.log" /> + </outputs> + <tests> + <test> + <param name="feature_type" value="exon" /> + <param name="gfffile" value="rn4_chr20_100k.gtf" /> + <param name="firstsamf" value="rn4chr20test1.bam" ftype="bam"/> + <param name="secondsamf" value="rn4chr20test2.bam" ftype="bam"/> + <param name="id_attr" value="gene_name" /> + <param name="model" value="union" /> + <param name="stranded" value="no" /> + <param name="title" value="htseqtest" /> + <param name="mapqMin" value="0" /> + + <output name="outfile" file="htseqsams2mx_test1_out.xls" lines_diff="1"/> + </test> + </tests> + <help> + +**What this tool does** + +This is a Galaxy wrapper for the GAT package + +**For complete GAT package documentation** + +please read the file manual at GAT_ + +**Brief overview** + +For those too lazy to click on GAT_, here's a summary of notes from there: + +The gat tool requires the following input: + + A set of intervals S with segments of interest to test + + A set of intervals A with annotations to test against + + A set of intervals W describing a workspace + + +All of the options –segment-file, –workspace-file, –annotation-file can be used several times on the command line. What happens with multiple files depends on the file type: + +Multiple –segment-file entries are added to the list of segments of interest to test with. +Multiple –annotation-file entries are added to the list of annotations to test against. +Multiple –workspace entries are intersected to create a single workspace. +Generally, gat will test m segments of interest lists against n annotations lists in all m * n combinations. + +Within a bed formatted file, different tracks can be separated using a UCSC formatted track line, such as this: + + + track name="segmentset1" + chr1 23 100 + chr3 50 2000 + track name="segmentset2" + chr1 1000 2000 + chr3 4000 5000 + +or alternatively, using the fourth column in a bed formatted file: + + chr1 23 100 segmentset1 + chr3 50 2000 segmentset1 + chr1 1000 2000 segmentset2 + chr3 4000 5000 segmentset2 + +The latter takes precedence. The option –ignore-segment-tracks forces gat to ignore the fourth column and consider all intervals to be from a single interval set. + +Note Be careful with bed-files where each interval gets a unique identifier. Gat will interprete each interval as a separate segment set to read. +This is usually not intended and causes gat to require a very large amount of memory. +By default, tracks can not be split over multiple files. + +Adding isochores + +Isochores are genomic segments with common properties that are potentially correlated with the segments of interest and the annotations, but the correlation is not of interest here. +For example, consider a CHiP-Seq experiment and the testing if CHiP-Seq intervals are close to genes. +G+C rich regions in the genome are gene rich, while at the same time there is possibly a nucleotide composition bias in the CHiP-Seq protocol depleting A+T rich sequence. +An association between genes and CHiP-Seq intervals might simply be due to the G+C effect. Using isochores can control for this effect to some extent. + +Isochores split the workspace into smaller workspaces of similar properties, so called isochore workspaces. +Simulations are performed for each isochore workspaces separately. At the end, results for each all isochore workspaces are aggregated. + + +**Attribution** + + +Otherwise, all code and documentation comprising this tool including the requirement +for more than one sample bam +was written by Ross Lazarus and is +licensed to you under the LGPL_ like other rgenetics artefacts + +.. _LGPL: http://www.gnu.org/copyleft/lesser.html +.. _GAT: http://www.cgat.org/~andreas/documentation/gat/contents.html + </help> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rlGAT/tool_dependencies.xml Tue Aug 27 23:16:38 2013 -0400 @@ -0,0 +1,35 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="numpy" version="1.7.1"> + <repository changeset_revision="af9633757cf0" name="package_numpy_1_7" owner="blankenberg" prior_installation_required="True" toolshed="http://testtoolshed.g2.bx.psu.edu" /> + </package> + <package name="matplotliblite" version="1.2.1"> + <repository changeset_revision="8df6bbf48c3a" name="package_matplotlib_lite" owner="fubar" prior_installation_required="True" toolshed="http://testtoolshed.g2.bx.psu.edu/" /> + </package> + <package name="gat" version="0.1"> + <install version="1.0"> + <actions> + <action type="make_directory">$INSTALL_DIR/lib/python</action> <!-- Not sure why these must be made apriori, but install fails otherwise --> + <action type="make_directory">$INSTALL_DIR/lib64/python</action> <!-- Not sure why these must be made apriori, but install fails otherwise --> + <action type="set_environment_for_install"> + <repository changeset_revision="af9633757cf0" name="package_numpy_1_7" owner="blankenberg" prior_installation_required="True" toolshed="http://testtoolshed.g2.bx.psu.edu"> + <package name="numpy" version="1.7.1" /> + </repository> + <repository changeset_revision="8df6bbf48c3a" name="package_matplotlib_lite" owner="fubar" prior_installation_required="True" toolshed="http://testtoolshed.g2.bx.psu.edu/"> + <package name="matplotliblite" version="1.2.1" /> + </repository> + </action> + <action type="download_by_url">https://genomic-association-tester.googlecode.com/files/genomic-association-tester-0.1.tar.gz</action> + <action type="shell_command">export PYTHONPATH=$PYTHONPATH:$INSTALL_DIR/lib/python:$INSTALL_DIR/lib64/python && python setup.py install --home $INSTALL_DIR --install-scripts $INSTALL_DIR/bin</action> + <action type="set_environment"> + <environment_variable action="append_to" name="PYTHONPATH">$INSTALL_DIR/lib/python:$INSTALL_DIR/lib64/python</environment_variable> + <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/bin</environment_variable> + </action> + </actions> + </install> + <readme> + Installation of GAT requires the Numpy Python package. matplotlib is used for plots so added here + Note this uses the matplotlib lite version dependent on the lite version of numpy - no atlas compilation + </readme> + </package> +</tool_dependency>