Mercurial > repos > kevyin > homer
changeset 23:4b5b9c36ff6f draft
Uploaded
author | kevyin |
---|---|
date | Sun, 16 Dec 2012 23:31:10 -0500 |
parents | d78b30da2e89 |
children | 4241b517de77 |
files | README annotatePeaks.xml bed2pos.xml findPeaks.xml makeTagDirectory.py makeTagDirectory.xml pos2bed.xml tool_dependencies.xml |
diffstat | 8 files changed, 401 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README Sun Dec 16 23:31:10 2012 -0500 @@ -0,0 +1,13 @@ +Homer wrapper for Galaxy + +Code repo: https://bitbucket.org/gvl/homer + +=========================================: +LICENSE for this wrapper: +=========================================: +Kevin Ying +Garvan Institute: http://www.garvan.org.au +GVL: https://genome.edu.au/wiki/GVL + +http://opensource.org/licenses/mit-license.php +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/annotatePeaks.xml Sun Dec 16 23:31:10 2012 -0500 @@ -0,0 +1,41 @@ +<tool id="homer_annotatePeaks" name="homer_annotatePeaks" version="0.0.3"> + <requirements> + <requirement type="package">homer</requirement> + </requirements> + <description></description> + <!--<version_command></version_command>--> + <command> + annotatePeaks.pl $input_bed $genome_selector 1> $out_annotated + 2> $out_log || echo "Error running annotatePeaks." >&2 + </command> + <inputs> + <param format="tabular,bed" name="input_bed" type="data" label="Homer peak positions or BED format" /> + <param name="genome_selector" type="select" label="Genome version"> + <option value="hg19" selected="true">hg19</option> + </param> + </inputs> + <outputs> + <!--<data format="html" name="html_outfile" label="index" />--> + <!--<data format="html" hidden="True" name="html_outfile" label="index.html" />--> + <data format="csv" name="out_annotated" label="${tool.name} on #echo os.path.splitext(str($input_bed.name))[0]#_genome_${genome_selector}" /> + <data format="txt" name="out_log" label="${tool.name} on #echo os.path.splitext(str($input_bed.name))[0]#_genome_${genome_selector}.log" /> + </outputs> + <tests> + <test> + <!--<param name="input_file" value="extract_genomic_dna.fa" />--> + <!--<output name="html_file" file="sample_output.html" ftype="html" />--> + </test> + </tests> + + <help> + + .. class:: infomark + + **Homer annoatePeaks** + More information on accepted formats + http://biowhat.ucsd.edu/homer/ngs/annotation.html + + + </help> +</tool> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bed2pos.xml Sun Dec 16 23:31:10 2012 -0500 @@ -0,0 +1,34 @@ +<tool id="homer_bed2pos" name="homer_bed2pos" version="0.0.2"> + <requirements> + <requirement type="package">homer</requirement> + </requirements> + <description></description> + <!--<version_command></version_command>--> + <command> + bed2pos.pl $input_bed 1> $out_pos + 2> $out_log || echo "Error running bed2pos." >&2 + </command> + <inputs> + <param format="tabular,bed" name="input_bed" type="data" label="BED file" /> + </inputs> + <outputs> + <!--<data format="html" name="html_outfile" label="index" />--> + <!--<data format="html" hidden="True" name="html_outfile" label="index.html" />--> + <data format="tabular" name="out_pos" label="${tool.name} on #echo os.path.splitext(str($input_bed.name))[0]#" /> + <data format="txt" name="out_log" label="${tool.name} on #echo os.path.splitext(str($input_bed.name))[0]#.log" /> + </outputs> + <tests> + <test> + <!--<param name="input_file" value="extract_genomic_dna.fa" />--> + <!--<output name="html_file" file="sample_output.html" ftype="html" />--> + </test> + </tests> + + <help> + .. class:: infomark + + **Homer bed2pos.pl** + http://biowhat.ucsd.edu/homer/ngs/miscellaneous.html + </help> +</tool> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/findPeaks.xml Sun Dec 16 23:31:10 2012 -0500 @@ -0,0 +1,116 @@ +<tool id="homer_findPeaks" name="homer_findPeaks" version="0.1.1"> + <requirements> + <requirement type="package">homer</requirement> + </requirements> + <description>Homer's peakcaller. Requires tag directories (see makeTagDirectory)</description> + <!--<version_command></version_command>--> + <command> + findPeaks $tagDir.extra_files_path $options -o $outputPeakFile + + #if $control_tagDir: + -i $control_tagDir.extra_files_path + #end if + + 2> $out_log || echo "Error running findPeaks." >&2 + </command> + <inputs> + <param format="homerTagDirectory" name="tagDir" type="data" label="tag directory" /> + <param format="homerTagDirectory" name="control_tagDir" type="data" optional="True" label="Control tag directory" /> + <param type="text" name="options" label="Extra options" value="" help="See here"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + <remove value="/"/> + </valid> + <mapping initial="none"> + <add source="'" target="__sq__"/> + </mapping> + </sanitizer> + </param> + </inputs> + <outputs> + <!--<data format="html" name="html_outfile" label="index" />--> + <!--<data format="html" hidden="True" name="html_outfile" label="index.html" />--> + <data format="txt" name="outputPeakFile" label="${tool.name} on #echo os.path.splitext(str($tagDir.name))[0]#.txt" /> + <data format="txt" name="out_log" label="${tool.name} on #echo os.path.splitext(str($tagDir.name))[0]#.log" /> + </outputs> + <tests> + <test> + <!--<param name="input_file" value="extract_genomic_dna.fa" />--> + <!--<output name="html_file" file="sample_output.html" ftype="html" />--> + </test> + </tests> + + <help> + + .. class:: infomark + + **Homer findPeaks** + http://biowhat.ucsd.edu/homer/ngs/peaks.html + + Usage: findPeaks <tag directory> [options] + + Finds peaks in the provided tag directory. By default, peak list printed to stdout + + General analysis options: + -o <filename|auto> (file name for to output peaks, default: stdout) + "-o auto" will send output to "<tag directory>/peaks.txt", ".../regions.txt", + or ".../transcripts.txt" depending on the "-style" option + -style <option> (Specialized options for specific analysis strategies) + factor (transcription factor ChIP-Seq, uses -center, output: peaks.txt, default) + histone (histone modification ChIP-Seq, region based, uses -region -size 500 -L 0, regions.txt) + groseq (de novo transcript identification from GroSeq data, transcripts.txt) + tss (TSS identification from 5' RNA sequencing, tss.txt) + dnase (Hypersensitivity [crawford style (nicking)], peaks.txt) + + chipseq/histone options: + -i <input tag directory> (Experiment to use as IgG/Input/Control) + -size <#> (Peak size, default: auto) + -minDist <#> (minimum distance between peaks, default: peak size x2) + -gsize <#> (Set effective mappable genome size, default: 2e9) + -fragLength <#|auto> (Approximate fragment length, default: auto) + -inputFragLength <#|auto> (Approximate fragment length of input tags, default: auto) + -tbp <#> (Maximum tags per bp to count, 0 = no limit, default: auto) + -inputtbp <#> (Maximum tags per bp to count in input, 0 = no limit, default: auto) + -strand <both|separate> (find peaks using tags on both strands or separate, default:both) + -norm # (Tag count to normalize to, default 10000000) + -region (extends start/stop coordinates to cover full region considered "enriched") + -center (Centers peaks on maximum tag overlap and calculates focus ratios) + -nfr (Centers peaks on most likely nucleosome free region [works best with mnase data]) + (-center and -nfr can be performed later with "getPeakTags" + + Peak Filtering options: (set -F/-L/-C to 0 to skip) + -F <#> (fold enrichment over input tag count, default: 4.0) + -P <#> (poisson p-value threshold relative to input tag count, default: 0.0001) + -L <#> (fold enrichment over local tag count, default: 4.0) + -LP <#> (poisson p-value threshold relative to local tag count, default: 0.0001) + -C <#> (fold enrichment limit of expected unique tag positions, default: 2.0) + -localSize <#> (region to check for local tag enrichment, default: 10000) + -inputSize <#> (Size of region to search for control tags, default: 2x peak size) + -fdr <#> (False discovery rate, default = 0.001) + -poisson <#> (Set poisson p-value cutoff, default: uses fdr) + -tagThreshold <#> (Set # of tags to define a peak, default: 25) + -ntagThreshold <#> (Set # of normalized tags to define a peak, by default uses 1e7 for norm) + -minTagThreshold <#> (Absolute minimum tags per peak, default: expected tags per peak) + + GroSeq Options: (Need to specify "-style groseq"): + -tssSize <#> (size of region for initiation detection/artifact size, default: 250) + -minBodySize <#> (size of regoin for transcript body detection, default: 1000) + -maxBodySize <#> (size of regoin for transcript body detection, default: 10000) + -tssFold <#> (fold enrichment for new initiation dectection, default: 4.0) + -bodyFold <#> (fold enrichment for new transcript dectection, default: 4.0) + -endFold <#> (end transcript when levels are this much less than the start, default: 10.0) + -fragLength <#> (Approximate fragment length, default: 150) + -uniqmap <directory> (directory of binary files specifying uniquely mappable locations) + Download from http://biowhat.ucsd.edu/homer/groseq/ + -confPvalue <#> (confidence p-value: 1.00e-05) + -minReadDepth <#> (Minimum initial read depth for transcripts, default: auto) + -pseudoCount <#> (Pseudo tag count, default: 2.0) + -gtf <filename> (Output de novo transcripts in GTF format) + "-o auto" will produce <dir>/transcripts.txt and <dir>/transcripts.gtf + + + + </help> +</tool> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/makeTagDirectory.py Sun Dec 16 23:31:10 2012 -0500 @@ -0,0 +1,94 @@ +""" + + +""" +import re +import os +import sys +import subprocess +import optparse +import shutil +import tempfile + +def getFileString(fpath, outpath): + """ + format a nice file size string + """ + size = '' + fp = os.path.join(outpath, fpath) + s = '? ?' + if os.path.isfile(fp): + n = float(os.path.getsize(fp)) + if n > 2**20: + size = ' (%1.1f MB)' % (n/2**20) + elif n > 2**10: + size = ' (%1.1f KB)' % (n/2**10) + elif n > 0: + size = ' (%d B)' % (int(n)) + s = '%s %s' % (fpath, size) + return s + +class makeTagDirectory(): + """wrapper + """ + + def __init__(self,opts=None, args=None): + self.opts = opts + self.args = args + + def run_makeTagDirectory(self): + """ + makeTagDirectory <Output Directory Name> [options] <alignment file1> [alignment file 2] + + """ + if self.opts.format != "bam": + cl = [self.opts.executable] + args + ["-format" , self.opts.format] + else: + cl = [self.opts.executable] + args + print cl + p = subprocess.Popen(cl) + retval = p.wait() + + + html = self.gen_html(args[0]) + #html = self.gen_html() + return html,retval + + def gen_html(self, dr=os.getcwd()): + flist = os.listdir(dr) + print flist + """ add a list of all files in the tagdirectory + """ + res = ['<div class="module"><h2>Files created by makeTagDirectory</h2><table cellspacing="2" cellpadding="2">\n'] + + flist.sort() + for i,f in enumerate(flist): + if not(os.path.isdir(f)): + fn = os.path.split(f)[-1] + res.append('<tr><td><a href="%s">%s</a></td></tr>\n' % (fn,getFileString(fn, dr))) + + res.append('</table>\n') + + return res + +if __name__ == '__main__': + op = optparse.OptionParser() + op.add_option('-e', '--executable', default='makeTagDirectory') + op.add_option('-o', '--htmloutput', default=None) + op.add_option('-f', '--format', default="sam") + opts, args = op.parse_args() + #assert os.path.isfile(opts.executable),'## makeTagDirectory.py error - cannot find executable %s' % opts.executable + + #if not os.path.exists(opts.outputdir): + #os.makedirs(opts.outputdir) + f = makeTagDirectory(opts, args) + + html,retval = f.run_makeTagDirectory() + f = open(opts.htmloutput, 'w') + f.write(''.join(html)) + f.close() + if retval <> 0: + print >> sys.stderr, serr # indicate failure + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/makeTagDirectory.xml Sun Dec 16 23:31:10 2012 -0500 @@ -0,0 +1,50 @@ +<tool id="homer_makeTagDirectory" name="homer_makeTagDirectory" version="1.0.0"> + <requirements> + <requirement type="package" >homer</requirement> + </requirements> + <description>Simple wrapper for makeTagDirectory. Used by findPeaks</description> + <!--<version_command></version_command>--> + <command interpreter="python">makeTagDirectory.py ${tagDir.files_path} + #for $alignF in $alignmentFiles + $alignF.file -f $alignF.file.ext + #end for + -o $tagDir + 2> $out_log || echo "Error running homer_makeTagDirectory." >&2 + + </command> + <inputs> + <param name="title" label="Name for the output tag directory" type="text" default="Homer TagDirectory" /> + <repeat name="alignmentFiles" title="Alignment Files"> + <param name="file" label="Add file" type="data" format="sam,bed" /> + </repeat> + </inputs> + <outputs> + <!--<data format="homerTagDirectory" name="tagDir" label="${title} tag directory" />--> + <data format="html" name="tagDir" label="${title} tag directory" /> + <data format="txt" name="out_log" label="${title}.log" /> + <!--<data format="html" name="html_outfile" label="index" />--> + <!--<data format="html" hidden="True" name="html_outfile" label="index.html" />--> + </outputs> + + + <tests> + <!--<test>--> + <!--<param name="input_file" value="extract_genomic_dna.fa" />--> + <!--<output name="html_file" file="sample_output.html" ftype="html" />--> + <!--</test>--> + </tests> + + <help> + + .. class:: infomark + + After the job finishes, save the zip file, unzip and open "index.html" + + The actual meme-chip command is displayed under "Command line summary" + + Defaults: + http://meme.nbcr.net/meme/doc/meme-chip.html + + </help> +</tool> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pos2bed.xml Sun Dec 16 23:31:10 2012 -0500 @@ -0,0 +1,34 @@ +<tool id="homer_pos2bed" name="homer_pos2bed" version="0.0.2"> + <requirements> + <requirement type="package">homer</requirement> + </requirements> + <description></description> + <!--<version_command></version_command>--> + <command> + pos2bed.pl $input_peak 1> $out_bed + 2> $out_log || echo "Error running pos2bed." >&2 + </command> + <inputs> + <param format="tabular" name="input_peak" type="data" label="Homer peak positions" /> + </inputs> + <outputs> + <!--<data format="html" name="html_outfile" label="index" />--> + <!--<data format="html" hidden="True" name="html_outfile" label="index.html" />--> + <data format="bed" name="out_bed" label="${tool.name} on #echo os.path.splitext(str($input_peak.name))[0]#.bed" /> + <data format="txt" name="out_log" label="${tool.name} on #echo os.path.splitext(str($input_peak.name))[0]#.log" /> + </outputs> + <tests> + <test> + <!--<param name="input_file" value="extract_genomic_dna.fa" />--> + <!--<output name="html_file" file="sample_output.html" ftype="html" />--> + </test> + </tests> + + <help> + .. class:: infomark + + **Homer pos2bed.pl** + http://biowhat.ucsd.edu/homer/ngs/miscellaneous.html + </help> +</tool> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Sun Dec 16 23:31:10 2012 -0500 @@ -0,0 +1,19 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="homer"> + <install version="1.0"> + <actions> + <action type="download_by_url">http://biowhat.ucsd.edu/homer/configureHomer.pl</action> + <action type="shell_command">perl ./configureHomer.pl -install</action> + <action type="shell_command">perl ./configureHomer.pl -install hg19</action> + <action type="set_environment"> + <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable> + </action> + </actions> + </install> + <readme> + Installs homer + </readme> + </package> +</tool_dependency> +