Mercurial > repos > kevyin > homer

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README	Sun Dec 16 23:31:10 2012 -0500
@@ -0,0 +1,13 @@
+Homer wrapper for Galaxy
+
+Code repo: https://bitbucket.org/gvl/homer
+
+=========================================:
+LICENSE for this wrapper:
+=========================================:
+Kevin Ying
+Garvan Institute: http://www.garvan.org.au
+GVL: https://genome.edu.au/wiki/GVL
+
+http://opensource.org/licenses/mit-license.php
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/annotatePeaks.xml	Sun Dec 16 23:31:10 2012 -0500
@@ -0,0 +1,41 @@
+<tool id="homer_annotatePeaks" name="homer_annotatePeaks" version="0.0.3">
+    <requirements>
+        <requirement type="package">homer</requirement>
+    </requirements>
+    <description></description>
+    <!--<version_command></version_command>-->
+    <command>
+        annotatePeaks.pl $input_bed $genome_selector 1&gt; $out_annotated
+        2&gt; $out_log || echo "Error running annotatePeaks." >&amp;2
+    </command>
+    <inputs>
+        <param format="tabular,bed" name="input_bed" type="data" label="Homer peak positions or BED format" />
+        <param name="genome_selector" type="select" label="Genome version">
+            <option value="hg19" selected="true">hg19</option>
+        </param>
+    </inputs>
+    <outputs>
+        <!--<data format="html" name="html_outfile" label="index" />-->
+        <!--<data format="html" hidden="True" name="html_outfile" label="index.html" />-->
+        <data format="csv" name="out_annotated" label="${tool.name} on #echo os.path.splitext(str($input_bed.name))[0]#_genome_${genome_selector}" />
+        <data format="txt" name="out_log" label="${tool.name} on #echo os.path.splitext(str($input_bed.name))[0]#_genome_${genome_selector}.log" />
+    </outputs>
+    <tests>
+        <test>
+            <!--<param name="input_file" value="extract_genomic_dna.fa" />-->
+            <!--<output name="html_file" file="sample_output.html" ftype="html" />-->
+        </test>
+    </tests>
+
+    <help>
+
+        .. class:: infomark
+
+        **Homer annoatePeaks**
+        More information on accepted formats
+        http://biowhat.ucsd.edu/homer/ngs/annotation.html
+
+
+    </help>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bed2pos.xml	Sun Dec 16 23:31:10 2012 -0500
@@ -0,0 +1,34 @@
+<tool id="homer_bed2pos" name="homer_bed2pos" version="0.0.2">
+    <requirements>
+        <requirement type="package">homer</requirement>
+    </requirements>
+    <description></description>
+    <!--<version_command></version_command>-->
+    <command>
+        bed2pos.pl $input_bed 1&gt; $out_pos
+        2&gt; $out_log || echo "Error running bed2pos." >&amp;2
+    </command>
+    <inputs>
+        <param format="tabular,bed" name="input_bed" type="data" label="BED file" />
+    </inputs>
+    <outputs>
+        <!--<data format="html" name="html_outfile" label="index" />-->
+        <!--<data format="html" hidden="True" name="html_outfile" label="index.html" />-->
+        <data format="tabular" name="out_pos" label="${tool.name} on #echo os.path.splitext(str($input_bed.name))[0]#" />
+        <data format="txt" name="out_log" label="${tool.name} on #echo os.path.splitext(str($input_bed.name))[0]#.log" />
+    </outputs>
+    <tests>
+        <test>
+            <!--<param name="input_file" value="extract_genomic_dna.fa" />-->
+            <!--<output name="html_file" file="sample_output.html" ftype="html" />-->
+        </test>
+    </tests>
+
+    <help>
+        .. class:: infomark
+
+        **Homer bed2pos.pl**
+        http://biowhat.ucsd.edu/homer/ngs/miscellaneous.html
+    </help>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/findPeaks.xml	Sun Dec 16 23:31:10 2012 -0500
@@ -0,0 +1,116 @@
+<tool id="homer_findPeaks" name="homer_findPeaks" version="0.1.1">
+    <requirements>
+        <requirement type="package">homer</requirement>
+    </requirements>
+    <description>Homer's peakcaller. Requires tag directories (see makeTagDirectory)</description>
+    <!--<version_command></version_command>-->
+    <command>
+        findPeaks $tagDir.extra_files_path $options -o $outputPeakFile
+
+    #if $control_tagDir:
+        -i $control_tagDir.extra_files_path
+    #end if
+
+        2&gt; $out_log || echo "Error running findPeaks." >&amp;2
+    </command>
+    <inputs>
+        <param format="homerTagDirectory" name="tagDir" type="data" label="tag directory" />
+        <param format="homerTagDirectory" name="control_tagDir" type="data" optional="True" label="Control tag directory" />
+        <param type="text" name="options" label="Extra options" value="" help="See here">
+          <sanitizer>
+            <valid initial="string.printable">
+             <remove value="&apos;"/>
+             <remove value="/"/>
+            </valid>
+            <mapping initial="none">
+              <add source="&apos;" target="__sq__"/>
+            </mapping>
+          </sanitizer>
+        </param>
+    </inputs>
+    <outputs>
+        <!--<data format="html" name="html_outfile" label="index" />-->
+        <!--<data format="html" hidden="True" name="html_outfile" label="index.html" />-->
+        <data format="txt" name="outputPeakFile" label="${tool.name} on #echo os.path.splitext(str($tagDir.name))[0]#.txt" />
+        <data format="txt" name="out_log" label="${tool.name} on #echo os.path.splitext(str($tagDir.name))[0]#.log" />
+    </outputs>
+    <tests>
+        <test>
+            <!--<param name="input_file" value="extract_genomic_dna.fa" />-->
+            <!--<output name="html_file" file="sample_output.html" ftype="html" />-->
+        </test>
+    </tests>
+
+    <help>
+
+  .. class:: infomark
+
+  **Homer findPeaks**
+  http://biowhat.ucsd.edu/homer/ngs/peaks.html
+
+	Usage: findPeaks &lt;tag directory&gt; [options]
+
+	Finds peaks in the provided tag directory.  By default, peak list printed to stdout
+
+	General analysis options:
+		-o &lt;filename|auto&gt; (file name for to output peaks, default: stdout)
+			&quot;-o auto&quot; will send output to &quot;&lt;tag directory&gt;/peaks.txt&quot;, &quot;.../regions.txt&quot;,
+			or &quot;.../transcripts.txt&quot; depending on the &quot;-style&quot; option
+		-style &lt;option&gt; (Specialized options for specific analysis strategies)
+			factor (transcription factor ChIP-Seq, uses -center, output: peaks.txt,  default)
+			histone (histone modification ChIP-Seq, region based, uses -region -size 500 -L 0, regions.txt)
+			groseq (de novo transcript identification from GroSeq data, transcripts.txt)
+			tss (TSS identification from 5&apos; RNA sequencing, tss.txt)
+			dnase (Hypersensitivity [crawford style (nicking)], peaks.txt)
+
+	chipseq/histone options:
+		-i &lt;input tag directory&gt; (Experiment to use as IgG/Input/Control)
+		-size &lt;#&gt; (Peak size, default: auto)
+		-minDist &lt;#&gt; (minimum distance between peaks, default: peak size x2)
+		-gsize &lt;#&gt; (Set effective mappable genome size, default: 2e9)
+		-fragLength &lt;#|auto&gt; (Approximate fragment length, default: auto)
+		-inputFragLength &lt;#|auto&gt; (Approximate fragment length of input tags, default: auto)
+		-tbp &lt;#&gt; (Maximum tags per bp to count, 0 = no limit, default: auto)
+		-inputtbp &lt;#&gt; (Maximum tags per bp to count in input, 0 = no limit, default: auto)
+		-strand &lt;both|separate&gt; (find peaks using tags on both strands or separate, default:both)
+		-norm # (Tag count to normalize to, default 10000000)
+		-region (extends start/stop coordinates to cover full region considered &quot;enriched&quot;)
+		-center (Centers peaks on maximum tag overlap and calculates focus ratios)
+		-nfr (Centers peaks on most likely nucleosome free region [works best with mnase data])
+			(-center and -nfr can be performed later with &quot;getPeakTags&quot;
+
+	Peak Filtering options: (set -F/-L/-C to 0 to skip)
+		-F &lt;#&gt; (fold enrichment over input tag count, default: 4.0)
+		  -P &lt;#&gt; (poisson p-value threshold relative to input tag count, default: 0.0001)
+		-L &lt;#&gt; (fold enrichment over local tag count, default: 4.0)
+		  -LP &lt;#&gt; (poisson p-value threshold relative to local tag count, default: 0.0001)
+		-C &lt;#&gt; (fold enrichment limit of expected unique tag positions, default: 2.0)
+		-localSize &lt;#&gt; (region to check for local tag enrichment, default: 10000)
+		-inputSize &lt;#&gt; (Size of region to search for control tags, default: 2x peak size)
+		-fdr &lt;#&gt; (False discovery rate, default = 0.001)
+		-poisson &lt;#&gt; (Set poisson p-value cutoff, default: uses fdr)
+		-tagThreshold &lt;#&gt; (Set # of tags to define a peak, default: 25)
+		-ntagThreshold &lt;#&gt; (Set # of normalized tags to define a peak, by default uses 1e7 for norm)
+		-minTagThreshold &lt;#&gt; (Absolute minimum tags per peak, default: expected tags per peak)
+
+	GroSeq Options: (Need to specify &quot;-style groseq&quot;):
+		-tssSize &lt;#&gt; (size of region for initiation detection/artifact size, default: 250)
+		-minBodySize &lt;#&gt; (size of regoin for transcript body detection, default: 1000)
+		-maxBodySize &lt;#&gt; (size of regoin for transcript body detection, default: 10000)
+		-tssFold &lt;#&gt; (fold enrichment for new initiation dectection, default: 4.0)
+		-bodyFold &lt;#&gt; (fold enrichment for new transcript dectection, default: 4.0)
+		-endFold &lt;#&gt; (end transcript when levels are this much less than the start, default: 10.0)
+		-fragLength &lt;#&gt; (Approximate fragment length, default: 150)
+		-uniqmap &lt;directory&gt; (directory of binary files specifying uniquely mappable locations)
+			Download from http://biowhat.ucsd.edu/homer/groseq/
+		-confPvalue &lt;#&gt; (confidence p-value: 1.00e-05)
+		-minReadDepth &lt;#&gt; (Minimum initial read depth for transcripts, default: auto)
+		-pseudoCount &lt;#&gt; (Pseudo tag count, default: 2.0)
+		-gtf &lt;filename&gt; (Output de novo transcripts in GTF format)
+			&quot;-o auto&quot; will produce &lt;dir&gt;/transcripts.txt and &lt;dir&gt;/transcripts.gtf
+
+
+
+    </help>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/makeTagDirectory.py	Sun Dec 16 23:31:10 2012 -0500
@@ -0,0 +1,94 @@
+"""
+
+
+"""
+import re
+import os
+import sys
+import subprocess
+import optparse
+import shutil
+import tempfile
+
+def getFileString(fpath, outpath):
+    """
+    format a nice file size string
+    """
+    size = ''
+    fp = os.path.join(outpath, fpath)
+    s = '? ?'
+    if os.path.isfile(fp):
+        n = float(os.path.getsize(fp))
+        if n > 2**20:
+            size = ' (%1.1f MB)' % (n/2**20)
+        elif n > 2**10:
+            size = ' (%1.1f KB)' % (n/2**10)
+        elif n > 0:
+            size = ' (%d B)' % (int(n))
+        s = '%s %s' % (fpath, size)
+    return s
+
+class makeTagDirectory():
+    """wrapper
+    """
+
+    def __init__(self,opts=None, args=None):
+        self.opts = opts
+        self.args = args
+
+    def run_makeTagDirectory(self):
+        """
+        makeTagDirectory <Output Directory Name> [options] <alignment file1> [alignment file 2]
+
+        """
+        if self.opts.format != "bam":
+            cl = [self.opts.executable] + args + ["-format" , self.opts.format]
+        else:
+            cl = [self.opts.executable] + args
+        print cl
+        p = subprocess.Popen(cl)
+        retval = p.wait()
+
+
+        html = self.gen_html(args[0])
+        #html = self.gen_html()
+        return html,retval
+
+    def gen_html(self, dr=os.getcwd()):
+        flist = os.listdir(dr)
+        print flist
+        """ add a list of all files in the tagdirectory
+        """
+        res = ['<div class="module"><h2>Files created by makeTagDirectory</h2><table cellspacing="2" cellpadding="2">\n']
+
+        flist.sort()
+        for i,f in enumerate(flist):
+             if not(os.path.isdir(f)):
+                 fn = os.path.split(f)[-1]
+                 res.append('<tr><td><a href="%s">%s</a></td></tr>\n' % (fn,getFileString(fn, dr)))
+
+        res.append('</table>\n')
+
+        return res
+
+if __name__ == '__main__':
+    op = optparse.OptionParser()
+    op.add_option('-e', '--executable', default='makeTagDirectory')
+    op.add_option('-o', '--htmloutput', default=None)
+    op.add_option('-f', '--format', default="sam")
+    opts, args = op.parse_args()
+    #assert os.path.isfile(opts.executable),'## makeTagDirectory.py error - cannot find executable %s' % opts.executable
+
+    #if not os.path.exists(opts.outputdir):
+        #os.makedirs(opts.outputdir)
+    f = makeTagDirectory(opts, args)
+
+    html,retval = f.run_makeTagDirectory()
+    f = open(opts.htmloutput, 'w')
+    f.write(''.join(html))
+    f.close()
+    if retval <> 0:
+         print >> sys.stderr, serr # indicate failure
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/makeTagDirectory.xml	Sun Dec 16 23:31:10 2012 -0500
@@ -0,0 +1,50 @@
+<tool id="homer_makeTagDirectory" name="homer_makeTagDirectory" version="1.0.0">
+    <requirements>
+        <requirement type="package" >homer</requirement>
+    </requirements>
+    <description>Simple wrapper for makeTagDirectory. Used by findPeaks</description>
+    <!--<version_command></version_command>-->
+    <command interpreter="python">makeTagDirectory.py ${tagDir.files_path}
+        #for $alignF in $alignmentFiles
+          $alignF.file -f $alignF.file.ext
+        #end for
+          -o $tagDir
+        2&gt; $out_log || echo "Error running homer_makeTagDirectory." >&amp;2
+
+    </command>
+    <inputs>
+        <param name="title" label="Name for the output tag directory" type="text" default="Homer TagDirectory" />
+        <repeat name="alignmentFiles" title="Alignment Files">
+          <param name="file" label="Add file" type="data" format="sam,bed" />
+        </repeat>
+    </inputs>
+    <outputs>
+        <!--<data format="homerTagDirectory" name="tagDir" label="${title} tag directory" />-->
+        <data format="html" name="tagDir" label="${title} tag directory" />
+        <data format="txt" name="out_log" label="${title}.log" />
+        <!--<data format="html" name="html_outfile" label="index" />-->
+        <!--<data format="html" hidden="True" name="html_outfile" label="index.html" />-->
+    </outputs>
+
+
+    <tests>
+        <!--<test>-->
+            <!--<param name="input_file" value="extract_genomic_dna.fa" />-->
+            <!--<output name="html_file" file="sample_output.html" ftype="html" />-->
+        <!--</test>-->
+    </tests>
+
+    <help>
+
+        .. class:: infomark
+
+        After the job finishes, save the zip file, unzip and open "index.html"
+
+        The actual meme-chip command is displayed under "Command line summary"
+
+        Defaults:
+        http://meme.nbcr.net/meme/doc/meme-chip.html
+
+    </help>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pos2bed.xml	Sun Dec 16 23:31:10 2012 -0500
@@ -0,0 +1,34 @@
+<tool id="homer_pos2bed" name="homer_pos2bed" version="0.0.2">
+    <requirements>
+        <requirement type="package">homer</requirement>
+    </requirements>
+    <description></description>
+    <!--<version_command></version_command>-->
+    <command>
+        pos2bed.pl $input_peak 1&gt; $out_bed
+        2&gt; $out_log || echo "Error running pos2bed." >&amp;2
+    </command>
+    <inputs>
+        <param format="tabular" name="input_peak" type="data" label="Homer peak positions" />
+    </inputs>
+    <outputs>
+        <!--<data format="html" name="html_outfile" label="index" />-->
+        <!--<data format="html" hidden="True" name="html_outfile" label="index.html" />-->
+        <data format="bed" name="out_bed" label="${tool.name} on #echo os.path.splitext(str($input_peak.name))[0]#.bed" />
+        <data format="txt" name="out_log" label="${tool.name} on #echo os.path.splitext(str($input_peak.name))[0]#.log" />
+    </outputs>
+    <tests>
+        <test>
+            <!--<param name="input_file" value="extract_genomic_dna.fa" />-->
+            <!--<output name="html_file" file="sample_output.html" ftype="html" />-->
+        </test>
+    </tests>
+
+    <help>
+        .. class:: infomark
+
+        **Homer pos2bed.pl**
+        http://biowhat.ucsd.edu/homer/ngs/miscellaneous.html
+    </help>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Sun Dec 16 23:31:10 2012 -0500
@@ -0,0 +1,19 @@
+<?xml version="1.0"?>
+<tool_dependency>
+  <package name="homer">
+    <install version="1.0">
+      <actions>
+        <action type="download_by_url">http://biowhat.ucsd.edu/homer/configureHomer.pl</action>
+        <action type="shell_command">perl ./configureHomer.pl -install</action>
+        <action type="shell_command">perl ./configureHomer.pl -install hg19</action>
+        <action type="set_environment">
+          <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable>
+        </action>
+      </actions>
+    </install>
+    <readme>
+      Installs homer
+    </readme>
+  </package>
+</tool_dependency>
+