changeset 2:f59f5348d281 draft

Uploaded
author alenail
date Mon, 07 Mar 2016 16:18:10 -0500
parents e6df07575a03
children c62c2fd68a29
files ._chipsequtil-master chipsequtil-master/._.gitignore chipsequtil-master/._MANIFEST.in chipsequtil-master/._README.txt chipsequtil-master/._docs chipsequtil-master/._examples chipsequtil-master/._ez_setup.py chipsequtil-master/._install.sh chipsequtil-master/._org_settings.cfg chipsequtil-master/._org_settings.cfg.sample chipsequtil-master/._scripts chipsequtil-master/._setup.cfg chipsequtil-master/._setup.py chipsequtil-master/._src chipsequtil-master/._uninstall.py chipsequtil-master/.gitignore chipsequtil-master/MANIFEST.in chipsequtil-master/README.txt chipsequtil-master/docs/._Makefile chipsequtil-master/docs/._get_script_help.py chipsequtil-master/docs/._source chipsequtil-master/docs/Makefile chipsequtil-master/docs/get_script_help.py chipsequtil-master/docs/source/._conf.py chipsequtil-master/docs/source/._index.rst chipsequtil-master/docs/source/._module_reference.rst chipsequtil-master/docs/source/._module_src chipsequtil-master/docs/source/._quick_start.rst chipsequtil-master/docs/source/._script_reference.rst chipsequtil-master/docs/source/conf.py chipsequtil-master/docs/source/index.rst chipsequtil-master/docs/source/module_reference.rst chipsequtil-master/docs/source/module_src/._chipsequtil.rst chipsequtil-master/docs/source/module_src/._file_wrappers.rst chipsequtil-master/docs/source/module_src/._motiftools.rst chipsequtil-master/docs/source/module_src/._nib.rst chipsequtil-master/docs/source/module_src/._org_settings.rst chipsequtil-master/docs/source/module_src/._seq.rst chipsequtil-master/docs/source/module_src/._util.rst chipsequtil-master/docs/source/module_src/chipsequtil.rst chipsequtil-master/docs/source/module_src/file_wrappers.rst chipsequtil-master/docs/source/module_src/motiftools.rst chipsequtil-master/docs/source/module_src/nib.rst chipsequtil-master/docs/source/module_src/org_settings.rst chipsequtil-master/docs/source/module_src/seq.rst chipsequtil-master/docs/source/module_src/util.rst chipsequtil-master/docs/source/quick_start.rst chipsequtil-master/docs/source/script_reference.rst chipsequtil-master/examples/._mapping chipsequtil-master/examples/._nib chipsequtil-master/examples/._seq chipsequtil-master/examples/mapping/._map_to_known_gene.sh chipsequtil-master/examples/mapping/._test_peaks.xls chipsequtil-master/examples/mapping/map_to_known_gene.sh chipsequtil-master/examples/mapping/test_peaks.xls chipsequtil-master/examples/nib/._shuffled_peaks.bed chipsequtil-master/examples/nib/._test_batch_fasta.py chipsequtil-master/examples/nib/._test_nib_db.py chipsequtil-master/examples/nib/shuffled_peaks.bed chipsequtil-master/examples/nib/test_batch_fasta.py chipsequtil-master/examples/nib/test_nib_db.py chipsequtil-master/examples/seq/._test_chipsequtil_seq.py chipsequtil-master/examples/seq/test_chipsequtil_seq.py chipsequtil-master/ez_setup.py chipsequtil-master/install.sh chipsequtil-master/org_settings.cfg chipsequtil-master/org_settings.cfg.sample chipsequtil-master/scripts/._THEME.sh chipsequtil-master/scripts/._build_chipseq_infosite.py chipsequtil-master/scripts/._chipseq_pipeline.py chipsequtil-master/scripts/._chipseq_pipeline_wo_ctrl.py chipsequtil-master/scripts/._combine_gerald_stats.py chipsequtil-master/scripts/._compare_microarray_binding.py chipsequtil-master/scripts/._construct_bg_fasta.py chipsequtil-master/scripts/._create_pipeline_script.py chipsequtil-master/scripts/._extract_promoters.py chipsequtil-master/scripts/._filter_bed_by_position_count.py chipsequtil-master/scripts/._filter_gps_peaks.py chipsequtil-master/scripts/._filter_macs_peaks.py chipsequtil-master/scripts/._filter_mapped_known_genes.py chipsequtil-master/scripts/._generate_stats_doc.py chipsequtil-master/scripts/._gerald_stats.py chipsequtil-master/scripts/._gerald_to_bed.py chipsequtil-master/scripts/._integrate_macs_ucsc.py chipsequtil-master/scripts/._join_mapped_known_genes.py chipsequtil-master/scripts/._kg_to_gff.py chipsequtil-master/scripts/._map_intervals.py chipsequtil-master/scripts/._map_peaks_to_genes.py chipsequtil-master/scripts/._map_peaks_to_known_genes.py chipsequtil-master/scripts/._motif_scan.py chipsequtil-master/scripts/._nibFrag.py chipsequtil-master/scripts/._org_settings.py chipsequtil-master/scripts/._peaks_to_fasta.py chipsequtil-master/scripts/._plot_peak_loc_dist.py chipsequtil-master/scripts/._plot_pos_vs_neg_peaks.py chipsequtil-master/scripts/._probeset_to_known_gene.py chipsequtil-master/scripts/._rejection_sample_fasta.py chipsequtil-master/scripts/._sort_bed.py chipsequtil-master/scripts/._split_file.py chipsequtil-master/scripts/._split_qsub.py chipsequtil-master/scripts/._wait_for_jobid.py chipsequtil-master/scripts/._wait_for_qsub.py chipsequtil-master/scripts/._wqsub.py chipsequtil-master/scripts/._wqsub_drmaa.py chipsequtil-master/scripts/THEME.sh chipsequtil-master/scripts/build_chipseq_infosite.py chipsequtil-master/scripts/chipseq_pipeline.py chipsequtil-master/scripts/chipseq_pipeline_wo_ctrl.py chipsequtil-master/scripts/combine_gerald_stats.py chipsequtil-master/scripts/compare_microarray_binding.py chipsequtil-master/scripts/construct_bg_fasta.py chipsequtil-master/scripts/create_pipeline_script.py chipsequtil-master/scripts/extract_promoters.py chipsequtil-master/scripts/filter_bed_by_position_count.py chipsequtil-master/scripts/filter_gps_peaks.py chipsequtil-master/scripts/filter_macs_peaks.py chipsequtil-master/scripts/filter_mapped_known_genes.py chipsequtil-master/scripts/generate_stats_doc.py chipsequtil-master/scripts/gerald_stats.py chipsequtil-master/scripts/gerald_to_bed.py chipsequtil-master/scripts/integrate_macs_ucsc.py chipsequtil-master/scripts/join_mapped_known_genes.py chipsequtil-master/scripts/kg_to_gff.py chipsequtil-master/scripts/map_intervals.py chipsequtil-master/scripts/map_peaks_to_genes.py chipsequtil-master/scripts/map_peaks_to_known_genes.py chipsequtil-master/scripts/motif_scan.py chipsequtil-master/scripts/nibFrag.py chipsequtil-master/scripts/org_settings.py chipsequtil-master/scripts/peaks_to_fasta.py chipsequtil-master/scripts/plot_peak_loc_dist.py chipsequtil-master/scripts/plot_pos_vs_neg_peaks.py chipsequtil-master/scripts/probeset_to_known_gene.py chipsequtil-master/scripts/rejection_sample_fasta.py chipsequtil-master/scripts/sort_bed.py chipsequtil-master/scripts/split_file.py chipsequtil-master/scripts/split_qsub.py chipsequtil-master/scripts/wait_for_jobid.py chipsequtil-master/scripts/wait_for_qsub.py chipsequtil-master/scripts/wqsub.py chipsequtil-master/scripts/wqsub_drmaa.py chipsequtil-master/setup.cfg chipsequtil-master/setup.py chipsequtil-master/src/._chipsequtil chipsequtil-master/src/chipsequtil/.___init__.py chipsequtil-master/src/chipsequtil/._chipsequtil.py chipsequtil-master/src/chipsequtil/._motiftools.py chipsequtil-master/src/chipsequtil/._nib.py chipsequtil-master/src/chipsequtil/._plotting.py chipsequtil-master/src/chipsequtil/._sampling.py chipsequtil-master/src/chipsequtil/._seq.py chipsequtil-master/src/chipsequtil/._util.py chipsequtil-master/src/chipsequtil/__init__.py chipsequtil-master/src/chipsequtil/chipsequtil.py chipsequtil-master/src/chipsequtil/motiftools.py chipsequtil-master/src/chipsequtil/nib.py chipsequtil-master/src/chipsequtil/plotting.py chipsequtil-master/src/chipsequtil/sampling.py chipsequtil-master/src/chipsequtil/seq.py chipsequtil-master/src/chipsequtil/util.py chipsequtil-master/uninstall.py
diffstat 161 files changed, 12858 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
Binary file ._chipsequtil-master has changed
Binary file chipsequtil-master/._.gitignore has changed
Binary file chipsequtil-master/._MANIFEST.in has changed
Binary file chipsequtil-master/._README.txt has changed
Binary file chipsequtil-master/._docs has changed
Binary file chipsequtil-master/._examples has changed
Binary file chipsequtil-master/._ez_setup.py has changed
Binary file chipsequtil-master/._install.sh has changed
Binary file chipsequtil-master/._org_settings.cfg has changed
Binary file chipsequtil-master/._org_settings.cfg.sample has changed
Binary file chipsequtil-master/._scripts has changed
Binary file chipsequtil-master/._setup.cfg has changed
Binary file chipsequtil-master/._setup.py has changed
Binary file chipsequtil-master/._src has changed
Binary file chipsequtil-master/._uninstall.py has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/.gitignore	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,5 @@
+*.swp
+build
+src/chipsequtil/org_settings.cfg
+dist
+*.pyc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/MANIFEST.in	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,5 @@
+include README.txt
+include org_settings.cfg.sample
+include setup.*
+recursive-include scripts *.py
+recursive-include src *.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/README.txt	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,29 @@
+Installation
+============
+
+Before installing, make a copy of *org_settings.cfg.sample* to *org_settings.cfg* :
+
+  $> cp org_settings.cfg.sample org_settings.cfg
+
+In the new *org_settings.cfg*, create/edit the paths and categories desired for
+your system as appropriate.  When you have configured the file to your
+satisfaction, copy it into the root source directory:
+
+  $> cp org_settings.cfg src/chipsequtil/
+
+You can then install the package with:
+
+  $> python setup.py install
+
+
+If you'd like to install the package to a non-system directory (e.g., if you
+don't have permission to install system-wide packages), you can provide the
+*--prefix=PATH* argument to the install command:
+
+  $> python setup.py install --prefix=/path/to/dir
+
+Remember to add */path/to/dir* to your PYTHONPATH environment variable if it
+is not already there.  If you wish to add more system-wide paths/organisms to
+org_settings.cfg, either edit the file in the source directory as above and
+reinstall (good way) or edit the file in the directory where the package is
+installed (less good way).
Binary file chipsequtil-master/docs/._Makefile has changed
Binary file chipsequtil-master/docs/._get_script_help.py has changed
Binary file chipsequtil-master/docs/._source has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/docs/Makefile	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,89 @@
+# Makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+SPHINXOPTS    =
+SPHINXBUILD   = sphinx-build
+PAPER         =
+BUILDDIR      = build
+
+# Internal variables.
+PAPEROPT_a4     = -D latex_paper_size=a4
+PAPEROPT_letter = -D latex_paper_size=letter
+ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
+
+.PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex changes linkcheck doctest
+
+help:
+	@echo "Please use \`make <target>' where <target> is one of"
+	@echo "  html      to make standalone HTML files"
+	@echo "  dirhtml   to make HTML files named index.html in directories"
+	@echo "  pickle    to make pickle files"
+	@echo "  json      to make JSON files"
+	@echo "  htmlhelp  to make HTML files and a HTML help project"
+	@echo "  qthelp    to make HTML files and a qthelp project"
+	@echo "  latex     to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
+	@echo "  changes   to make an overview of all changed/added/deprecated items"
+	@echo "  linkcheck to check all external links for integrity"
+	@echo "  doctest   to run all doctests embedded in the documentation (if enabled)"
+
+clean:
+	-rm -rf $(BUILDDIR)/*
+
+html:
+	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
+	@echo
+	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
+
+dirhtml:
+	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
+	@echo
+	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
+
+pickle:
+	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
+	@echo
+	@echo "Build finished; now you can process the pickle files."
+
+json:
+	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
+	@echo
+	@echo "Build finished; now you can process the JSON files."
+
+htmlhelp:
+	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
+	@echo
+	@echo "Build finished; now you can run HTML Help Workshop with the" \
+	      ".hhp project file in $(BUILDDIR)/htmlhelp."
+
+qthelp:
+	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
+	@echo
+	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
+	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
+	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/ChIPSeqUtil.qhcp"
+	@echo "To view the help file:"
+	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/ChIPSeqUtil.qhc"
+
+latex:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo
+	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
+	@echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \
+	      "run these through (pdf)latex."
+
+changes:
+	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
+	@echo
+	@echo "The overview file is in $(BUILDDIR)/changes."
+
+linkcheck:
+	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
+	@echo
+	@echo "Link check complete; look for any errors in the above output " \
+	      "or in $(BUILDDIR)/linkcheck/output.txt."
+
+doctest:
+	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
+	@echo "Testing of doctests in the sources finished, look at the " \
+	      "results in $(BUILDDIR)/doctest/output.txt."
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/docs/get_script_help.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,99 @@
+#!/usr/bin/env python
+
+import glob
+import signal
+import time
+from subprocess import Popen, PIPE
+from textwrap import TextWrapper
+
+class Alarm(Exception):
+    pass
+
+def alarm_handler(signum, frame):
+    raise Alarm
+
+signal.signal(signal.SIGALRM, alarm_handler)
+
+scripts = [#'../scripts/build_chipseq_infosite.py',
+           '../scripts/chipseq_pipeline.py',
+           #'../scripts/combine_gerald_stats.py',
+           #'../scripts/compare_microarray_binding.py',
+           '../scripts/create_pipeline_script.py',
+           '../scripts/extract_promoters.py',
+           '../scripts/filter_bed_by_position_count.py',
+           '../scripts/filter_macs_peaks.py',
+           '../scripts/filter_gps_peaks.py',
+           '../scripts/filter_mapped_known_genes.py',
+           #'../scripts/generate_stats_doc.py',
+           '../scripts/gerald_stats.py',
+           '../scripts/gerald_to_bed.py',
+           #'../scripts/integrate_macs_ucsc.py',
+           '../scripts/join_mapped_known_genes.py',
+           '../scripts/map_intervals.py',
+           '../scripts/map_peaks_to_genes.py',
+           '../scripts/map_peaks_to_known_genes.py',
+           '../scripts/motif_scan.py',
+           '../scripts/nibFrag.py',
+           '../scripts/org_settings.py',
+           '../scripts/peaks_to_fasta.py',
+           '../scripts/plot_pos_vs_neg_peaks.py',
+           '../scripts/plot_peak_loc_dist.py',
+           #'../scripts/probeset_to_known_gene.py',
+           '../scripts/rejection_sample_fasta.py',
+           '../scripts/sort_bed.py',
+           #'../scripts/split_file.py',
+           #'../scripts/split_qsub.py',
+           #'../scripts/THEME.sh',
+           #'../scripts/wait_for_qsub.py',
+           '../scripts/wait_for_jobid.py',
+           '../scripts/wqsub.py',
+           '../scripts/wqsub_drmaa.py',
+           ]
+
+if __name__ == '__main__' :
+
+    tw = TextWrapper(initial_indent="   ",subsequent_indent="   ")
+    script_help_out = ''
+    refs = ''
+    for script in scripts :
+        cmd = 'python %s -h'%script
+        p = Popen(cmd,shell=True,stdout=PIPE,stderr=PIPE)
+
+        stdout, stderr = None, None
+        signal.alarm(3)  # 3 seconds
+        try:
+            stdout, stderr = p.communicate()
+            signal.alarm(0)  # reset the alarm
+        except Alarm:
+            pass
+
+        script_str = script.replace('../scripts/','')
+
+
+        refs += '  - :ref:`%(script_str)s <%(script_str)s>`\n'%{'script_str':script_str}
+        script_help_out += '.. _%s:\n\n'%script_str
+        script_help_out += '%s::\n\n'%script_str
+        if stderr is None :
+            script_help_out += tw.fill('empty docstring\n')
+        else :
+            script_help_out += '\n'.join(['   '+x for x in stdout.split('\n')])
+            script_help_out += '\n'.join(['   '+x for x in stderr.split('\n')])
+        script_help_out += '\n\n'
+        script_help_out += ':ref:`top <top>`\n\n'
+
+    rst_str = """\
+Illumina pipeline script reference
+==================================
+
+The following is the output of the scripts provided by this package when invoked
+on the command line with *-h*.
+
+.. _top:
+
+Scripts:
+%(refs)s
+
+%(script_help_out)s
+"""%{'refs':refs,'script_help_out':script_help_out}
+
+    print rst_str
Binary file chipsequtil-master/docs/source/._conf.py has changed
Binary file chipsequtil-master/docs/source/._index.rst has changed
Binary file chipsequtil-master/docs/source/._module_reference.rst has changed
Binary file chipsequtil-master/docs/source/._module_src has changed
Binary file chipsequtil-master/docs/source/._quick_start.rst has changed
Binary file chipsequtil-master/docs/source/._script_reference.rst has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/docs/source/conf.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,198 @@
+# -*- coding: utf-8 -*-
+#
+# ChIPSeqUtil documentation build configuration file, created by
+# sphinx-quickstart on Mon Oct 31 13:12:52 2011.
+#
+# This file is execfile()d with the current directory set to its containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+import sys, os
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#sys.path.append(os.path.abspath('.'))
+
+# -- General configuration -----------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be extensions
+# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
+extensions = ['sphinx.ext.autodoc', 'sphinx.ext.intersphinx', 'sphinx.ext.todo', 'sphinx.ext.coverage', 'sphinx.ext.pngmath']
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix of source filenames.
+source_suffix = '.rst'
+
+# The encoding of source files.
+#source_encoding = 'utf-8'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = u'ChIPSeqUtil'
+copyright = u'2011, Adam Labadorf'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+version = '1.5'
+# The full version, including alpha/beta/rc tags.
+release = '1.5'
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#language = None
+
+# There are two options for replacing |today|: either, you set today to some
+# non-false value, then it is used:
+#today = ''
+# Else, today_fmt is used as the format for a strftime call.
+#today_fmt = '%B %d, %Y'
+
+# List of documents that shouldn't be included in the build.
+#unused_docs = []
+
+# List of directories, relative to source directory, that shouldn't be searched
+# for source files.
+exclude_trees = []
+
+# The reST default role (used for this markup: `text`) to use for all documents.
+#default_role = None
+
+# If true, '()' will be appended to :func: etc. cross-reference text.
+#add_function_parentheses = True
+
+# If true, the current module name will be prepended to all description
+# unit titles (such as .. function::).
+#add_module_names = True
+
+# If true, sectionauthor and moduleauthor directives will be shown in the
+# output. They are ignored by default.
+#show_authors = False
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# A list of ignored prefixes for module index sorting.
+#modindex_common_prefix = []
+
+
+# -- Options for HTML output ---------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  Major themes that come with
+# Sphinx are currently 'default' and 'sphinxdoc'.
+html_theme = 'default'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
+#html_theme_options = {}
+
+# Add any paths that contain custom themes here, relative to this directory.
+#html_theme_path = []
+
+# The name for this set of Sphinx documents.  If None, it defaults to
+# "<project> v<release> documentation".
+#html_title = None
+
+# A shorter title for the navigation bar.  Default is the same as html_title.
+#html_short_title = None
+
+# The name of an image file (relative to this directory) to place at the top
+# of the sidebar.
+#html_logo = None
+
+# The name of an image file (within the static path) to use as favicon of the
+# docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
+# pixels large.
+#html_favicon = None
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
+# using the given strftime format.
+#html_last_updated_fmt = '%b %d, %Y'
+
+# If true, SmartyPants will be used to convert quotes and dashes to
+# typographically correct entities.
+#html_use_smartypants = True
+
+# Custom sidebar templates, maps document names to template names.
+#html_sidebars = {}
+
+# Additional templates that should be rendered to pages, maps page names to
+# template names.
+#html_additional_pages = {}
+
+# If false, no module index is generated.
+#html_use_modindex = True
+
+# If false, no index is generated.
+#html_use_index = True
+
+# If true, the index is split into individual pages for each letter.
+#html_split_index = False
+
+# If true, links to the reST sources are added to the pages.
+#html_show_sourcelink = True
+
+# If true, an OpenSearch description file will be output, and all pages will
+# contain a <link> tag referring to it.  The value of this option must be the
+# base URL from which the finished HTML is served.
+#html_use_opensearch = ''
+
+# If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml").
+#html_file_suffix = ''
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'ChIPSeqUtildoc'
+
+
+# -- Options for LaTeX output --------------------------------------------------
+
+# The paper size ('letter' or 'a4').
+#latex_paper_size = 'letter'
+
+# The font size ('10pt', '11pt' or '12pt').
+#latex_font_size = '10pt'
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title, author, documentclass [howto/manual]).
+latex_documents = [
+  ('index', 'ChIPSeqUtil.tex', u'ChIPSeqUtil Documentation',
+   u'Adam Labadorf', 'manual'),
+]
+
+# The name of an image file (relative to this directory) to place at the top of
+# the title page.
+#latex_logo = None
+
+# For "manual" documents, if this is true, then toplevel headings are parts,
+# not chapters.
+#latex_use_parts = False
+
+# Additional stuff for the LaTeX preamble.
+#latex_preamble = ''
+
+# Documents to append as an appendix to all manuals.
+#latex_appendices = []
+
+# If false, no module index is generated.
+#latex_use_modindex = True
+
+
+# Example configuration for intersphinx: refer to the Python standard library.
+intersphinx_mapping = {'http://docs.python.org/': None}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/docs/source/index.rst	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,51 @@
+.. ChIPSeqUtil documentation master file, created by
+   sphinx-quickstart on Mon Oct 31 13:12:52 2011.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+Welcome to ChIPSeqUtil's documentation!
+=======================================
+
+ChIPSeqUtil is a python module and accompanying set of scripts used in the
+analysis of ChIPSeq short read data.  It is designed as a 'push-button' solution
+that is easy for non-linux-experts to use but is flexible and extensible enough
+to accomodate special cases when they inevitably arise. The default pipeline
+performs the following analysis steps:
+
+1. runs a peak caller (MACS by default)
+2. optionally creates and stages bigwig files for viewing on UCSC Genome Browser
+3. filters peaks based on confidence criteria (e.g. p-value)
+4. maps peaks to genes using UCSC knownGene annotations
+5. performs hypothesis-based motif analysis using TRANSFAC motifs
+6. builds a web page consolidating results
+
+ChIPSeqUtil has the following dependencies:
+
+  - MACS (or some other peaks caller)
+  - TAMO
+  - reStUtil
+  - pypeline
+  - bx python
+
+.. note:: add links to these bullets
+
+ChIPSeqUtil has only been tested on ubuntu-based linux distributions and no
+certification is made for other OSes.  That being said, some/all of it may
+still work.
+
+Contents:
+
+.. toctree::
+   :maxdepth: 2
+
+   quick_start
+   script_reference
+   module_reference
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/docs/source/module_reference.rst	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,11 @@
+
+Module Reference
+================
+
+The module documentation of the chipsequtil python package is here.
+
+.. toctree::
+
+    module_src/chipsequtil
+    module_src/nib
+    module_src/seq
Binary file chipsequtil-master/docs/source/module_src/._chipsequtil.rst has changed
Binary file chipsequtil-master/docs/source/module_src/._file_wrappers.rst has changed
Binary file chipsequtil-master/docs/source/module_src/._motiftools.rst has changed
Binary file chipsequtil-master/docs/source/module_src/._nib.rst has changed
Binary file chipsequtil-master/docs/source/module_src/._org_settings.rst has changed
Binary file chipsequtil-master/docs/source/module_src/._seq.rst has changed
Binary file chipsequtil-master/docs/source/module_src/._util.rst has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/docs/source/module_src/chipsequtil.rst	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,27 @@
+
+chipsequtil
+===========
+
+Contents
+--------
+
+.. toctree::
+
+    file_wrappers
+    org_settings
+
+
+.. automodule:: chipsequtil
+    :members:
+    :undoc-members:
+
+Miscellaneous Functions
+-----------------------
+
+.. autofunction:: get_file_parts
+.. autofunction:: parse_number
+.. autofunction:: gerald_to_bed
+.. autofunction:: reverse_complement
+.. autofunction:: get_gc_content
+.. autofunction:: get_gc_content_distribution
+.. autofunction:: get_size_distribution
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/docs/source/module_src/file_wrappers.rst	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,28 @@
+
+File Wrappers
+=============
+
+.. module:: chipsequtil
+
+.. autoclass:: SmartFileIter
+    :members:
+
+SmartFileIter-based classes
+---------------------------
+
+.. autoclass:: BEDFile
+.. autoclass:: GPSFile
+.. autoclass:: MACSFile
+.. autoclass:: KnownGeneFile
+
+Other wrappers
+--------------
+
+Not all of the file wrappers in this package have been converted to SmartFileIters
+yet, these work but are less robust.
+
+.. autoclass:: AffyBiocFile
+.. autoclass:: GERALDOutput
+    :members:
+.. autoclass:: RefGeneFile
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/docs/source/module_src/motiftools.rst	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,56 @@
+
+Motif Classes and Functions
+===========================
+
+This module is essentially a copy of TAMO.MotifTools, moved into chipsequtil
+for strategic sheep purposes.
+
+.. automodule:: chipsequtil.motiftools
+
+The Motif Class
+---------------
+
+.. autoclass:: Motif
+   :members:
+
+Functions
+---------
+
+.. .. autofunction::  revcomplement
+.. autofunction::  Motif_from_ll
+.. autofunction::  Motif_from_counts
+.. autofunction::  Motif_from_text
+.. autofunction::  copy
+.. .. autofunction::  minwindowdiff
+.. .. autofunction::  minaligndiff
+.. autofunction::  diff
+.. autofunction::  maskdiff
+.. autofunction::  infomaskdiff
+.. autofunction::  diverge
+.. autofunction::  bestseqs
+.. autofunction::  seqs2fasta
+.. autofunction::  top_nmers
+.. autofunction::  m_matches
+.. autofunction::  compare_seqs
+.. autofunction::  shuffle_bases
+.. autofunction::  random_diff_avestd
+.. autofunction::  random_motif
+.. autofunction::  toDict
+.. autofunction::  toDictVect
+.. autofunction::  submotif
+.. autofunction::  shuffledP
+.. autofunction::  revcompmotif
+.. autofunction::  sum
+.. autofunction::  giflogo
+.. autofunction::  seqlogo
+.. autofunction::  merge
+.. autofunction::  avestd
+.. autofunction::  load
+.. autofunction::  save_motifs
+.. autofunction::  print_motif
+.. autofunction::  print_motifs
+.. autofunction::  nlog10
+.. autofunction::  txt2motifs
+.. autofunction::  pickletxt2motifs
+.. autofunction::  sortby
+.. .. autoclass:: MotifToolsException
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/docs/source/module_src/nib.rst	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,36 @@
+
+.. module:: chipsequtil.nib
+
+nibFrag API
+===========
+
+These functions and classes are a native python implementation of Jim Kent's nibFrag
+utility and file format.  The scripts and classes read *.nib* files and can
+extract sequences from them as fast or faster than the standalone tools, and
+also make sequence data accessible and efficient from within python scripts.
+There is no provided utility to create *.nib* files, the original source scripts
+must be used and are not provided in this distribution.  They might be found on
+`Jim Kent's homepage <http://users.soe.ucsc.edu/~kent/>`_.
+
+
+The NibDB Class
+---------------
+
+.. autoclass:: NibDB
+    :members:
+
+Functions
+---------
+
+Most of these functions should not be used directly, rather they are called
+by the NibDB class and implement the gritty details of reading *.nib* files.
+Use the NibDB class instead unless you know what you're doing.
+
+
+.. autofunction:: get_nib
+.. autofunction:: get_nib_batch
+.. autofunction:: get_nib_seq
+.. autofunction:: get_nib_header
+.. autofunction:: get_nib_header_batch
+.. autofunction:: validate_nib_file
+.. autofunction:: get_nib_seq_batch
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/docs/source/module_src/org_settings.rst	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,91 @@
+
+The `org_settings` System
+=========================
+
+Many scripts in this package require a number of different source files that all
+correspond to a single reference genome (*e.g.* mm9).  The `org_settings` set of
+functions and *org_settings.py* script consolidates sets of paths/variables that
+correspond to different references to be bundled together in a customizable,
+accessible way.  The bundles are configured as a package-wide settings on install
+and alternatively by a user-specific configuration file.  The format of the file
+follows the conventions in `configparser`_.
+
+.. _configparser: http://docs.python.org/library/configparser.html
+
+Reference genomes are specified in a configuration file as follows::
+
+    [mm9]
+    description=UCSC mm9 (July '07 build) with full TRANSFAC hypothesis set
+    genome=mm9
+    genome_dir=/nfs/genomes/mouse_gp_jul_07
+    genome_size=2107000000
+    ucsc_chrom_sizes=%(genome_dir)s/%(genome)s.chrom.sizes
+    annotation_path=%(genome_dir)s/anno/refFlat-%(genome)s.txt
+    refgene_anno_path=%(genome_dir)s/anno/refFlat-%(genome)s.txt
+    known_gene_anno_path=%(genome_dir)s/anno/knownGene-%(genome)s.txt
+    known_gene_xref_path=%(genome_dir)s/anno/kgXref-%(genome)s.txt
+    affy_to_known_path=%(genome_dir)s/anno/knownToMOE43-%(genome)s.txt
+    theme_hypotheses=/nfs/vendata/cwng/TRANSFAC/2010_transfac_vert_all_filtic9.tamo
+    theme_markov=/nfs/data/cwng/chipseq/hypotheses/Mouse.markov
+
+This will make **mm9** available as an organism reference to the `org_settings`
+functions. The *ucsc_chrom_sizes*, *annotation_path*, *refgene_anno_path*,
+*known_gene_anno_path*, *known_gene_xref_path*, and *affy_to_known_path* are
+files downloaded from http://hgdownload.cse.ucsc.edu/downloads.html organims
+annotation databases.  The fields in the above example are all required for the
+package to work properly - however, any additional variables may be added as
+desired.
+
+API Functions
+-------------
+
+.. module:: chipsequtil
+
+.. autofunction:: get_org_settings
+.. autofunction:: get_all_settings
+.. autofunction:: get_global_settings
+.. autofunction:: get_local_settings
+.. autofunction:: check_org_settings
+
+The *org_settings.py* script
+----------------------------
+
+The script *org_settings.py* is a command line interface into the `org_settings`
+system.  It has the following usage::
+
+  $> org_settings.py -h
+  Usage: org_settings.py [options] [<org key> [<org setting>]]
+
+  Tool for retrieving sets of organism-specific settings and paths. Original
+  paths are set at install time, and can be overridden in the file ~/.org
+  settings.cfg. Allows output of settings in a variety of shell environment
+  syntaxes.  The tool attempts to guess which shell environment is being used by
+  examining the SHELL environment variable unless explicitly set.  When run
+  without an argument, returns a listing of all settings available.
+
+  Options:
+    -h, --help            show this help message and exit
+    -s SYNTAX, --syntax=SYNTAX
+                          syntax flavor                   of output to produce
+                          [default: %auto]
+    -l, --list            print                   all available settings for
+                          human consumption
+  $> org_settings.py -s bash mm9 genome_dir
+  /nfs/genomes/mouse_gp_jul_07
+  $>
+
+If you use bash as your shell, you can use shell expansion to conveniently build
+commands such as the following::
+
+  $> map_peaks_to_known_genes.py $(org_settings.py mm9 known_gene_anno_path) \
+     $(org_settings.py mm9 known_gene_xref_path) macs_peaks.xls
+
+Installing
+----------
+
+The file *org_settings.cfg* exists in the root directory of the source distribution.
+This file should be modified and then copied into the *src/chipsequtil/* directory
+before installation for org settings that should be available on the system as a
+whole.  Alternatively, users may create the file *.org_settings.cfg* in their home
+directories and add sections like the one above so they may customize their own
+sets of variables.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/docs/source/module_src/seq.rst	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,34 @@
+
+.. module:: chipsequtil.seq
+
+Sequence data functions and classes
+===================================
+
+This module has simple methods for reading in FASTA and FASTQ formatted files.
+*fasta_itr* and *fastq_itr* should be used when it is unnecessary or undesired
+to have all sequences loaded into memory.  *FASTAFile* and *FASTQFile* classes
+store all sequence information in memory, but allow efficient dictionary-style 
+random access to sequences and quality scores as well as repeated whole-file
+iteration.
+
+Functions
+---------
+
+.. autofunction:: fasta_itr
+.. autofunction:: fasta_to_dict
+.. autofunction:: write_fasta_to_file
+
+.. autofunction:: fastq_itr
+.. autofunction:: fastq_to_dict
+.. autofunction:: write_fastq_to_file
+
+Classes
+-------
+
+.. autoclass:: FASTAFile
+    :members:
+
+.. autoclass:: FASTQFile
+    :members:
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/docs/source/module_src/util.rst	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,4 @@
+
+Utility functions and classes
+=============================
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/docs/source/quick_start.rst	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,5 @@
+
+Quick Start Documentation
+=========================
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/docs/source/script_reference.rst	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,892 @@
+Illumina pipeline script reference
+==================================
+
+The following is the output of the scripts provided by this package when invoked
+on the command line with *-h*.
+
+.. _top:
+
+Scripts:
+  - :ref:`chipseq_pipeline.py <chipseq_pipeline.py>`
+  - :ref:`create_pipeline_script.py <create_pipeline_script.py>`
+  - :ref:`extract_promoters.py <extract_promoters.py>`
+  - :ref:`filter_bed_by_position_count.py <filter_bed_by_position_count.py>`
+  - :ref:`filter_macs_peaks.py <filter_macs_peaks.py>`
+  - :ref:`filter_gps_peaks.py <filter_gps_peaks.py>`
+  - :ref:`filter_mapped_known_genes.py <filter_mapped_known_genes.py>`
+  - :ref:`gerald_stats.py <gerald_stats.py>`
+  - :ref:`gerald_to_bed.py <gerald_to_bed.py>`
+  - :ref:`join_mapped_known_genes.py <join_mapped_known_genes.py>`
+  - :ref:`map_intervals.py <map_intervals.py>`
+  - :ref:`map_peaks_to_genes.py <map_peaks_to_genes.py>`
+  - :ref:`map_peaks_to_known_genes.py <map_peaks_to_known_genes.py>`
+  - :ref:`motif_scan.py <motif_scan.py>`
+  - :ref:`nibFrag.py <nibFrag.py>`
+  - :ref:`org_settings.py <org_settings.py>`
+  - :ref:`peaks_to_fasta.py <peaks_to_fasta.py>`
+  - :ref:`plot_pos_vs_neg_peaks.py <plot_pos_vs_neg_peaks.py>`
+  - :ref:`plot_peak_loc_dist.py <plot_peak_loc_dist.py>`
+  - :ref:`rejection_sample_fasta.py <rejection_sample_fasta.py>`
+  - :ref:`sort_bed.py <sort_bed.py>`
+  - :ref:`wait_for_jobid.py <wait_for_jobid.py>`
+  - :ref:`wqsub.py <wqsub.py>`
+  - :ref:`wqsub_drmaa.py <wqsub_drmaa.py>`
+
+
+.. _chipseq_pipeline.py:
+
+chipseq_pipeline.py::
+
+   Usage: chipseq_pipeline.py [options] <organism> <experiment alignment filename> [<control alignment filename>]
+   
+   1st generation ChIPSeq analysis pipeline:
+   
+     - runs MACS to find peaks and sorts peaks by p-value
+     - sorts peaks by pvalue and isolates top *n*
+     - maps peaks to genes
+     - extracts fasta files for gene peaks in experiments
+     - constructs background sequences matching foreground distribution
+     - runs THEME.py on input sequences w/ refinement
+     - builds an infosite with stats from this analysis
+   
+   Control input file is optional.  *organism* argument is passed to the
+   *org_settings.py* command to specify organism specific parameters, ensure
+   that the following commands return valid paths:
+   
+   If running MACS:
+    - org_settings.py <organism> genome_size
+    - org_settings.py <organism> genome_dir
+    - org_settings.py <organsim> refgene_anno_path
+   
+   If running THEME:
+    - org_settings.py <organism> theme_hypotheses
+    - org_settings.py <organism> theme_markov
+   
+   
+   
+   Options:
+     -h, --help            show this help message and exit
+     --auto                run all steps non-interactively (for batch mode, e.g.)
+     --steplist=STEPLIST   with --auto, run specific steps
+     --exp-name=EXP_NAME   name for the experiment/pipeline, used for convenience
+                           [default: current directory name]
+     --bed-args=BED_ARGS   double quote wrapped arguments for gerald_to_bed.py
+                           [default: --stdout --chromo-strip=.fa]
+     --macs-exec=MACS_EXEC
+                           the executable to use for MACS, if not an absolute
+                           path it needs to be on your shell environment path
+                           [default: macs14]
+     --macs-args=MACS_ARGS
+                           double quote wrapped arguments for macs, only changing
+                           --mfold, --tsize, --bw, and --pvalue recommended
+                           [default: --pvalue=1e-5]
+     --map-args=MAP_ARGS   double quote wrapped arguments for mapping peaks to
+                           genes [default: --tss --upstream-window=10000
+                           --downstream-window=10000]
+     --filter-peaks-args=FILTER_PEAKS_ARGS
+                           double quote wrapped arguments for
+                           filter_macs_peaks.py [default: --sort-by=pvalue
+                           --top=1000 -f 'tags>20']
+     --filter-neg-peaks-args=FILTER_NEG_PEAKS_ARGS
+                           double quote wrapped arguments for
+                           filter_macs_peaks.py applied to negative peaks
+                           [default: -f 'tags>20']
+     --peaks-to-fa-args=PEAKS_TO_FA_ARGS
+                           double quote wrapped arguments for peaks_to_fasta.py
+                           [default: --fixed-peak-width=200]
+     --bg-exec=BG_EXEC     the executable to use for generating background
+                           sequences for THEME, if not an absolute path it needs
+                           to be on your shell environment path [default:
+                           rejection_sample_fasta.py]
+     --bg-args=BG_ARGS     double quote wrapped arguments for background sequence
+                           generation utility [default: --num-seq=2.1x]
+     --theme-args=THEME_ARGS
+                           double quote wrapped arguments for THEME.py [default:
+                           --beta=0.7 --cv=5 --trials=25]
+     --motif-pval-cutoff=MOTIF_PVAL
+                           the p-value cutoff for sending non-refined enrichmed
+                           motifs to THEME for refinement
+     --parallelize         parallelize portions of the pipeline using qsub, only
+                           works from SGE execution hosts
+     --ucsc                perform tasks for automated integration with UCSC
+                           genome browser [default:False]
+     --build-infosite-args=INFOSITE_ARGS
+                           arguments to pass to build_chipseq_infosite.py
+                           [default: None]
+   
+     UCSC Integration Options (with --ucsc):
+       --stage-dir=STAGE_DIR
+                           root directory where UCSC integration files should be
+                           made available [default: ./]
+       --stage-url=STAGE_URL
+                           URL where UCSC integration files will be made
+                           available over the web [default: http://localhost/]
+   
+   Note: it is advised to leave the --*-args arguments unchanged
+   unless you really know what you're doing.
+      
+
+:ref:`top <top>`
+
+.. _create_pipeline_script.py:
+
+create_pipeline_script.py::
+
+   This is an interactive script that creates an executable script to use
+   for ChIPSeq analyses. When prompted for experiment and control files,
+   tab completion is available a la bash or tcsh shells. Press Ctrl-C at
+   any time to quit.
+   Usage: create_pipeline_script.py
+   
+   Script for creating a custom run script for ChIPSeq/DNAse hypersensitivity
+   experiments.  User is asked for paths and settings required for ChIPSeq
+   analysis using the *chipseq_pipeline.py* utility and produces an executable
+   run script with helpful information on how to run it.  Also creates a JSON
+   formatted file containing all the parameters for this pipeline run.
+   
+   Options:
+     -h, --help  show this help message and exit
+   
+   Note: this script only works in Unix-style environments
+      
+   ================= ChIPSeq Experiment Pipeline Script Generator =================
+   
+
+:ref:`top <top>`
+
+.. _extract_promoters.py:
+
+extract_promoters.py::
+
+   Usage: extract_promoters.py [options] <organism>
+   
+   Extract the promoter sequences in FASTA format from all genes
+   or a list of genes specified in an input file.  Gene annotation is RefGene
+   corresponding to the organism passed in, paths returned by:
+   
+   $> org_settings.py <organism> refgene_anno_path
+   $> org_settings.py <organism> genome_dir
+   
+   must be valid.
+   
+   Options:
+     -h, --help            show this help message and exit
+     -u UPSTREAM, --upstream=UPSTREAM
+                           upstream window from TSS to extract [default: 3000]
+     -d DOWNSTREAM, --downstream=DOWNSTREAM
+                           downstream window from TSS to extract [default: 1000]
+     -l GENE_LIST, --gene-list=GENE_LIST
+                           file containing a list of gene identifiers to extract,
+                           one per line [default: none]
+     -t GENE_TYPE, --gene-type=GENE_TYPE
+                           type of gene identifier in gene list, choose from
+                           ['symbol', 'refgene'] [default: symbol]
+     -o OUTPUT, --output=OUTPUT
+                           file to write fasta records to [default: stdout]
+      
+
+:ref:`top <top>`
+
+.. _filter_bed_by_position_count.py:
+
+filter_bed_by_position_count.py::
+
+   Usage: filter_bed_by_position_count.py [options] <bed file>
+   
+   Analyze BED file and filter out alignments above some threshold that align to
+   a single genomic position.
+   
+   Options:
+     -h, --help            show this help message and exit
+     -n MAX_COUNT, --max-count=MAX_COUNT
+                           max tag count at a given position, filter above
+                           [default: 5]
+     --output=OUTPUT       write output to file
+   
+   Note: only works if BED file is sorted!
+      
+
+:ref:`top <top>`
+
+.. _filter_macs_peaks.py:
+
+filter_macs_peaks.py::
+
+   Usage: filter_macs_peaks.py [options] <MACS peak file>
+   
+   Filter MACS peaks by supplied criteria.  Available filter features are:
+   
+   length
+   tags
+   pvalue
+   fold_enrichment
+   fdr
+   
+   Filters are provided as expressions using the [-f |--filter] option, e.g. the
+   command
+   
+   filter_macs_peaks.py -f "tags>100" --filter="pvalue<=1e-9"
+   --filter="100<length<=200" <MACS peak file>
+   
+   finds only peaks with more than 100 tags, a pvalue of less than 1e9, and a
+   length between 100, exclusive, and 200, inclusive.  Any number of filters may
+   be provided, and only peaks that match *all* filters pass.  User is warned if
+   filters result in zero results.  Only inequality operators are valid.
+   Invoking with no filter arguments returns all peaks.  To sort, use the --sort-
+   by option, e.g.
+   
+   filter_macs_peaks.py -f "pvalue<=1e-9" --sort-by=pvalue <MACS peak file>
+   
+   sorts peaks with a pvalue smaller than 1e-9 with the smallest pvalue peaks.
+   All fields are sorted ascending by default.  Output is prepended with comments
+   describing what the file contains, i.e. which filters are applied, how many
+   records there are, etc.
+   
+   Note: MACS -10*log10(pvalue) values are converted to normal pvalues
+   
+   
+   Options:
+     -h, --help            show this help message and exit
+     -f FILTERS, --filter=FILTERS
+                           add filter expression
+     --sort-by=SORT_BY     comma delimited list of features to sort by, filtered
+                           peaks are not sorted by default, if provided peaks are
+                           sorted ascending by default
+     --sort-dir=SORT_DIR   direction to sort [default: ASCEND]
+     --top=TOP             accepts an integer, output at most this many peaks
+                           [default: all]
+     --output=OUTPUT       filename to output filtered peaks to [default: stdout]
+     --encode-filters      write out records to a file <MACS peaks
+                           file>_<filters>.xls (incompatible with --output
+                           option)
+     --summary             only print out summary information for the filter
+     --no-header           do not print out header or metadata info
+     --shuffle             shuffle order of filtered records, useful for
+                           selecting random peaks
+     --print-encoded-fn    print out the filename that would be created by
+                           --encode-filters
+      
+
+:ref:`top <top>`
+
+.. _filter_gps_peaks.py:
+
+filter_gps_peaks.py::
+
+   Usage: filter_gps_peaks.py [options] <GPS peak file>
+   
+   Filter GPS peaks by supplied criteria.  Available filter features are:
+   
+   IP
+   Control
+   Fold
+   qvalue
+   pvalue
+   IPvsEMP
+   IPvsCTR
+   
+   Filters are provided as expressions using the [-f |--filter] option, e.g. the
+   command
+   
+   filter_gps_peaks.py -f "IP>100" --filter="pvalue<=1e-9" <GPS peak file>
+   
+   finds only peaks with more than 100 tags and a pvalue of less than 1e9.  Any
+   number of filters may be provided, and only peaks that match *all* filters
+   pass. User is warned if filters result in zero results.  Only inequality
+   operators are valid.  Invoking with no filter arguments returns all peaks.  To
+   sort, use the --sort-by option, e.g.
+   
+   filter_gps_peaks.py -f "pvalue<=1e-9" --sort-by=pvalue <GPS peak file>
+   
+   sorts peaks with a pvalue smaller than 1e-9 with the smallest pvalue peaks.
+   All fields are sorted ascending by default.  Output is prepended with comments
+   describing what the file contains, i.e. which filters are applied, how many
+   records there are, etc.
+   
+   Note: GPS P_-log10 and Q_-log10 values are converted to normal pvalues and
+   qvalues
+   
+   
+   Options:
+     -h, --help            show this help message and exit
+     -f FILTERS, --filter=FILTERS
+                           add filter expression
+     --sort-by=SORT_BY     comma delimited list of features to sort by, filtered
+                           peaks are not sorted by default, if provided peaks are
+                           sorted ascending by default
+     --sort-dir=SORT_DIR   direction to sort [default: ASCEND]
+     --top=TOP             accepts an integer, output at most this many peaks
+                           [default: all]
+     --output=OUTPUT       filename to output filtered peaks to [default: stdout]
+     --encode-filters      write out records to a file <GPS peaks
+                           file>_<filters>.xls (incompatible with --output
+                           option)
+     --summary             only print out summary information for the filter
+     --no-header           do not print out header or metadata info
+     --shuffle             shuffle order of filtered records, useful for
+                           selecting random peaks
+     --print-encoded-fn    print out the filename that would be created by
+                           --encode-filters
+      
+
+:ref:`top <top>`
+
+.. _filter_mapped_known_genes.py:
+
+filter_mapped_known_genes.py::
+
+   Usage: filter_mapped_known_genes.py [options] <mapped known genes file>
+   
+   Filter columns and rows from *join_mapped_known_genes.py* output which was
+   invoked with *--binary-plus* and *--field-types* flags.  Specify full column
+   names for either binding or expression data with the *--bind-cols* and
+   *--affy-cols* arguments, respectively. The special fieldname *MAPPED* from
+   *join_mapped_known_genes.py* is used to determine whether a file contains a
+   mapping for each gene.  To filter genes by their associated binding or
+   expression data, specify *--bind-filter* or *--affy-filter* as follows:
+   
+     - *any* - report gene if at least one input file maps to the gene
+     - *all* - report gene if every input file maps to the gene
+     - *absent* - report gene if no input file maps to the gene
+     - *none* - do not filter genes at all (default)
+   
+   Results of binding and expression filters are 'and'ed together, e.g.:
+   
+   --bind-filter=all --affy-filter=absent
+   
+   returns only genes for which all binding files and none of the expression
+   files map.
+   
+   
+   Options:
+     -h, --help            show this help message and exit
+     --bind-cols=BIND_COLS
+                           comma delimited list of binding data column names to
+                           include, [default: all]
+     --affy-cols=AFFY_COLS
+                           comma delimited list of expression data column names
+                           to include, [default: all]
+     --bind-filter=BIND_FILT
+                           gene set to include based on binding data [default:
+                           none]
+     --affy-filter=AFFY_FILT
+                           gene set to include based on expression data [default:
+                           none]
+     --output=OUTPUT       write output to file
+   
+   Note: when specifying column names, be sure to escape characters like
+   (,),&,*,etc... that shells interpret with a \, e.g. --bind-
+   cols=-10\*log10\(pvalue\)
+      
+
+:ref:`top <top>`
+
+.. _gerald_stats.py:
+
+gerald_stats.py::
+
+   Usage: gerald_stats.py [options] <filename> [<filename>...]
+   
+   Outputs various stats about the GERALD formatted file(s) input. If multiple
+   files are provided statistics are aggregated according to the specified output
+   format.  Output formats available via --format=X :
+   
+     # *python* - print an eval()'able python dictionary w/ counts
+     # *rst* - print statistics in a reStructured text table (default)
+     # *tab* - print statistics in a tab delimited form w/ header names
+   
+   Except for *python* format, each input file has its own output line.  *python*
+   summarizes all alignments.
+   
+   
+   Options:
+     -h, --help       show this help message and exit
+     --output=OUTPUT  write output to file [default: stdout]
+     --format=FORMAT  format to print out stats [default: rst]
+      
+
+:ref:`top <top>`
+
+.. _gerald_to_bed.py:
+
+gerald_to_bed.py::
+
+   Usage: gerald_to_bed.py [options] <GERALD file> [<GERALD file>...]
+   
+   Convert the GERALD alignment formatted files into BED format.  Input file
+   named <path>/<filename>.<ext> is translated into <path>/<filename>.bed unless
+   --output or --stdout is specified, in which case formatted lines are written
+   to file or standard output, respectively.  If multiple input files are
+   supplied with the --output or --stdout option all formatted lines are
+   concatenated together. Formatting only occurs for GERALD input lines that have
+   a valid Match Position field (i.e. successfully aligned somewhere).
+   
+   Options:
+     -h, --help            show this help message and exit
+     --output=OUTPUT       write all records to file
+     --stdout              write out all formatted lines to stdout
+     --min-fields          only format the first three fields
+     --pass-only           only format lines with Y in the Pass Filtering field
+     --chromo-strip=CHROMO_STRIP
+                           pattern to remove from chromo field in BED output
+                           (e.g. --chromo-strip=.fa to remve .fa from chrX.fa)
+                           [default: .fa]
+      
+
+:ref:`top <top>`
+
+.. _join_mapped_known_genes.py:
+
+join_mapped_known_genes.py::
+
+   Usage: join_mapped_known_genes.py -b <mapped DNA binding file>|-a <mapped microarray file> [-b <mapped DNA binding file> ...] [-a <mapped microarray file> ...]
+   
+   Join all files on the first column, concatenating records with matching
+   entries onto one line per entry.  Understands DNA binding data as mapped with
+   *map_peaks_to_known_genes.py* utility microarray data as mapped by
+   *probeset_to_known_genes.py* utility, passed to program using *-b* and *-a*
+   options respectively.  If a file contains more than one mapping to a gene
+   additional columns are added. At least one file of either type is required.
+   Field names are written as <filename>.<original field name>.<map number>
+   
+   Options:
+     -h, --help            show this help message and exit
+     -a AFFY_FILE, --affy-file=AFFY_FILE
+                           add a mapped microarray file
+     -b BIND_FILE, --bind-file=BIND_FILE
+                           add a mapped DNA binding file (e.g. MACS, BED)
+     -m MACS_FILE, --macs-file=MACS_FILE
+                           DEPRECATED: use -b instead, add a mapped default MACS
+                           formatted peaks (*.xls) file
+     --output=OUTPUT       file to output joined records to [default: stdout]
+     --first-only          only output the first mapping to a gene from each file
+     --binary              output only one column per file with a 0 or 1 to
+                           indicate whether a mapping exists in that file
+     --binary-plus         output one column per file with a 0 or 1 to indicate
+                           whether a mapping exists in that file in addition to
+                           all other columns
+     --field-types         prepend BIND or AFFY to the beginning of all
+                           appropriate columns
+   
+   Note: microarray files should have been created by bioconductor, and all files
+   should have a row of fieldnames as the first line
+      
+
+:ref:`top <top>`
+
+.. _map_intervals.py:
+
+map_intervals.py::
+
+   Usage: map_intervals.py [options] <from> <to>
+   
+   Find records in <to> interval file that map to records in <from> interval
+   file.  Files should be tab delimited and are expected to have a chromosome
+   column, a start column, and an end column.  The indices of these columns can
+   be specified on the command line but by default are the first three columns,
+   respectively.  Prints out to stdout by default one new line separated row per
+   row in <from> with a line from <to> where there is a mapping. If no mapping is
+   found (e.g. when specifying a maximum margin to search within) the word None
+   is printed.  By default only prints nearest record, with ties settled by
+   smallest line number in <to>.
+   
+   Options:
+     -h, --help            show this help message and exit
+     -w WINDOW, --window=WINDOW
+                           window as <int upstream> <int downstream> to search
+                           for intervals [default: (1000000000.0, 1000000000.0)]
+     -f FROM_IND, --from=FROM_IND
+                           coordinates of chromosome, start, stop in <from> file
+     -i, --skip-from-header
+                           <from> has a header that should be skipped
+     -t TO_IND, --to=TO_IND
+                           coordinates of chromosome, start, stop in <to> file
+     -j, --skip-to-header  <to> has a header that should be skipped
+      
+
+:ref:`top <top>`
+
+.. _map_peaks_to_genes.py:
+
+map_peaks_to_genes.py::
+
+   Usage: map_peaks_to_genes.py [options] <refGene file> <peaks file>
+   
+    Map the peaks in <peaks file> to genes in <refGene file>.  <refGene file> is
+   format is as specified in
+   http://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/refGene.sql. <peaks
+   file> format is as produced by MACS.
+   
+   Options:
+     -h, --help            show this help message and exit
+     --upstream-window=UPST_WIN
+                           window width in base pairs to consider promoter region
+                           [default: 5500]
+     --downstream-window=DNST_WIN
+                           window width in base pairs to consider downstream
+                           region [default: 2500]
+     --map-output=PEAK_OUTPUT
+                           filename to output mapped peaks in BED format to
+                           [default: stdout]
+     --stats-output=STATS_OUTPUT
+                           filename to output summary stats in conversion
+                           [default: stderr]
+     --peaks-format=PEAKS_FMT
+                           format of peaks input file [default: MACS]
+      
+
+:ref:`top <top>`
+
+.. _map_peaks_to_known_genes.py:
+
+map_peaks_to_known_genes.py::
+
+   Usage: map_peaks_to_known_genes.py [options] <knownGene file> <knownGene xRef file> <peaks file>
+   
+   
+   Map the peaks in <peaks file> to genes in <knownGene file>.  <knownGene file>
+   isformat is as specified in
+   http://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/knownGene.sql.<peaks
+   file> format is as produced by MACS.  If *auto* is chosen (default) file
+   extension is examined for *.xls* for default MACS format or *.bed* for BED
+   format.  If the --detailoption is provided, the following extra fields are
+   appended to each row:
+   
+   peak loc, dist from feature, score, map type, map subtype
+   
+   
+   Options:
+     -h, --help            show this help message and exit
+     --upstream-window=UPST_WIN
+                           window width in base pairs to consider promoter region
+                           [default: 5500]
+     --downstream-window=DNST_WIN
+                           window width in base pairs to consider downstream
+                           region [default: 2500]
+     --tss                 calculate downstream window from transcription start
+                           site instead of transcription end site
+     --map-output=PEAK_OUTPUT
+                           filename to output mapped peaks to [default: stdout]
+     --stats-output=STATS_OUTPUT
+                           filename to output summary stats in conversion
+                           [default: stderr]
+     --peaks-format=PEAKS_FMT
+                           format of peaks input file [default: auto]
+     --detail              add extra fields to output, see description
+     --intergenic          write intergenic peaks to the gene file as well with
+                           None as gene ID
+      
+
+:ref:`top <top>`
+
+.. _motif_scan.py:
+
+motif_scan.py::
+
+   Usage: motif_scan.py [options] <org> <peaks fn> <TAMO motif fn>
+   
+   Do some motif scanning stuffs
+   
+   Options:
+     -h, --help            show this help message and exit
+     -n TOP_N, --top-n=TOP_N
+                           use top n peaks by pvalue for sequence scanning
+                           [default: all]
+     -i MOTIF_IND, --motif-indices=MOTIF_IND
+                           which indices from <TAMO motif fn> to use [default:
+                           all]
+     -d DIR, --dir=DIR     write all results into this directory
+     --fixed-peak-width=FIXED_W
+                           use only a fixed peak window around the summit instead
+                           of whole peak
+      
+
+:ref:`top <top>`
+
+.. _nibFrag.py:
+
+nibFrag.py::
+
+   Usage: nibFrag.py [options] file.nib start end strand [outfile]
+     -- or --
+   nibFrag.py [options] --batch file.nib batchfile [batchfile ...]
+   
+   A python implementation of Jim Kent's nibFrag utility that allows outputting
+   to stdout.  Otherwise the functionality is identical for the non-batch usage.
+   Batch mode accepts one or more files containing sets of coordinates to extract
+   from the nib file.  Only BED formatting is accepted at the moment. All
+   sequences are concatenated together in FASTA format.  To retrieve the entire
+   sequence, use END as the end argument.
+   
+   Options:
+     -h, --help            show this help message and exit
+     --no-header           only output sequence (no fasta header)
+     --wrap-width=WRAP_WIDTH
+                           wrap output sequence at this number of bases, 0
+                           indicates no wrap (sequence ends up on single line)
+                           [default: 50]
+     --batch               run in batch mode, interpret arguments after nib file
+                           as queries
+     --batch-format=BATCH_FORMAT
+                           format to interpret batch files [default: BED]
+   
+     Original nibFrag options:
+       --masked            use lower case characters for bases meant to be masked
+                           out
+       --hardMasked        use upper case for non masked-out and 'N' characters
+                           for masked-out bases
+       --upper             use upper case characters for all bases
+       --name=NAME         Use given name after '>' in output sequence
+       --dbHeader=DBHEADER
+                           Add full database info to the header, with or without
+                           -name option
+       --tbaHeader=TBAHEADER
+                           Format header for compatibility with tba, takes
+                           database name as argument
+   
+   Note: When specifying --name optionin batch mode, also specify --dbHeader to
+   ensure unique FASTA headers.
+      
+
+:ref:`top <top>`
+
+.. _org_settings.py:
+
+org_settings.py::
+
+   Usage: org_settings.py [options] [<org key> [<org setting>]]
+   
+   Tool for retrieving sets of organism-specific settings and paths. Original
+   paths are set at install time, and can be overridden in the file ~/.org
+   settings.cfg. Allows output of settings in a variety of shell environment
+   syntaxes.  The tool attempts to guess which shell environment is being used by
+   examining the SHELL environment variable unless explicitly set.  When run
+   without an argument, returns a listing of all settings available.
+   
+   Options:
+     -h, --help            show this help message and exit
+     -s SYNTAX, --syntax=SYNTAX
+                           syntax flavor                   of output to produce
+                           [default: %auto]
+     -l, --list            print                   all available settings for
+                           human consumption
+      
+
+:ref:`top <top>`
+
+.. _peaks_to_fasta.py:
+
+peaks_to_fasta.py::
+
+   Usage: peaks_to_fasta.py [options] <organism> <peak file> [<peak file> ...]
+   
+   Extract sequences for peaks in provided peak file(s).  Can interpret MACS or
+   BED output, determined automatically by .xls or .bed extensions respectively
+   (force explicit format with --peak-format option).  Outputs fasta sequences
+   for the peaks in all files extracted from the reference genome specified by
+   the output of *org_settings.py <organism> genome_dir* to stdout by
+   default.Chromosome names in peak files must match nib filenames without
+   extension (e.g. peak line: chr1 0  100 searches *genome_dir*/chr1.nib).  Fasta
+   records have the following format:
+   
+   ><chromosome>:<start>-<end>;fn=<name of file>:<line number>;db_fn=<db
+   filename>;fmt=<format>;<source alignment info>
+   <sequence...>
+   
+   <db filename> is the filename where the sequence was extracted, <format> is
+   the format of the input file (MACS or BED), and <source alignment info>
+   contains all the fields from the originating alignment according to the source
+   format.
+   
+   Options:
+     -h, --help            show this help message and exit
+     --min-header          only store <chromosome>:<start>-<end> in header
+     --peak-format=PEAK_FORMAT
+                           peak file format, 'auto' determines format by
+                           extension, choices: MACS, BED, auto [default: auto]
+     --output=OUTPUT       filename to output fasta records to [default: stdout]
+     --fixed-peak-width=FIXED_PEAK_WIDTH
+                           return a fixed number of bases flanking peak summit
+                           (*summit* field in MACS, (end-start)/2 in BED),
+                           ignoring start/stop coords [default: None]
+     --wrap-width=WRAP_WIDTH
+                           wrap fasta sequences to specified width. -1 indicates
+                           no wrap [default: 70]
+      
+
+:ref:`top <top>`
+
+.. _plot_pos_vs_neg_peaks.py:
+
+plot_pos_vs_neg_peaks.py::
+
+   Usage: plot_pos_vs_neg_peaks.py [options] <pos peaks fn> <neg peaks fn>
+   
+   Options:
+     -h, --help            show this help message and exit
+     -o OUT_FN, --output=OUT_FN
+                           filename of output image
+      
+
+:ref:`top <top>`
+
+.. _plot_peak_loc_dist.py:
+
+plot_peak_loc_dist.py::
+
+   Usage: plot_peak_loc_dist.py [options] <peaks fn> <gene list fn>
+   
+   Produce a pie chart of the locations of peaks in different bins (promoter,
+   gene, exon, intron, etc.) and, optionally, save the different records to their
+   own files for subsequent analysis.  Also produce a histogram of distance from
+   feature values in mapping file. Peaks file is expected to be as output by
+   MACS, or alternately as a BED file but then the -b plot is not available.
+   Gene list file is expected to be in the format as output by
+   peaks_to_known_genes.py script.
+   
+   Options:
+     -h, --help            show this help message and exit
+     -b BAR_FN, --bar-fn=BAR_FN
+                           filename for pvalue stacked bar chart
+     -g GENE_PIE_FN, --gene-pie-fn=GENE_PIE_FN
+                           filename for pie chart image
+     -p PEAK_PIE_FN, --peak-pie-fn=PEAK_PIE_FN
+                           filename for pie chart image
+     -f DIST_FN, --dist-fn=DIST_FN
+                           filename for distance from feature image
+     -s, --save            write out files containing peaks for each category
+     -d OUT_DIR, --output-dir=OUT_DIR
+                           output files created by --save option to this
+                           directory
+     --no-plot             dont show (but save) the figure produced
+     --peaks-format=PEAK_FMT
+                           format of peaks file, either MACS or BED [default:
+                           MACS]
+      
+
+:ref:`top <top>`
+
+.. _rejection_sample_fasta.py:
+
+rejection_sample_fasta.py::
+
+   Usage: rejection_sample_fasta.py [options] <organism> <fasta file> [<fasta file> ... ]
+   
+   Use rejection sampling to generate a set of background/random
+   sequences matching the distance to nearest transcription start site, sequence
+   length, and GC content distributions of the input fasta file(s).  Generated
+   sequences are genomic sequences sampled based on these distributions. All
+   sequences
+   from all files are used to generate the background sequences. The following
+   command must output a path to a nib genomic sequence directory and refGene
+   annotation, respectively :
+   
+   $> org_settings.py <organism> genome_dir
+   $> org_settings.py <organism> refgene_anno_path
+   
+   Utility prints out generated fasta records to stdout by default.  Input
+   sequences
+   from chr20 are mapped to chrX, chr21 are mapped to chrY, and sequences from
+   chrM
+   are not used.
+   
+   
+   Options:
+     -h, --help            show this help message and exit
+     -n NUM_SEQS, --num-seqs=NUM_SEQS
+                           number of sequences to generate, either absolute
+                           number or factor of # input sequences, e.g. 2.5x for
+                           2.5 times the # of input sequences [default: 1x]
+     --output=OUTPUT       file to output fasta records to [default: stdout]
+     --bed                 also produce a BED formatted file representing sampled
+                           sequences
+     --bed-output=BED_OUTPUT
+                           with --bed, file to output BED records to [default:
+                           output.bed]
+     -v, --verbose         print out debug information
+      
+
+:ref:`top <top>`
+
+.. _sort_bed.py:
+
+sort_bed.py::
+
+   Usage: sort_bed.py [options] <BED file> [<BED file> <BED file>...]
+   
+   Sort the BED formatted files first by chromosome (field 1) and then by start
+   coordinate (field 2).  Lines from all files submitted are concatenated and
+   sorted in the final output.
+   
+   Options:
+     -h, --help       show this help message and exit
+     --output=OUTPUT  filename to write the sorted BED lines [default: stdout]
+      
+
+:ref:`top <top>`
+
+.. _wait_for_jobid.py:
+
+wait_for_jobid.py::
+
+   Usage: wait_for_jobid.py [options] <job id> [<job id>...]
+   
+   Poll qstat and wait until all <job id>s are finished
+   
+   Options:
+     -h, --help  show this help message and exit
+      
+
+:ref:`top <top>`
+
+.. _wqsub.py:
+
+wqsub.py::
+
+   Usage: [wqsub.py] [options] command
+   
+   Wrap the specified command into a qsub script and submit it for execution.
+   Script captures both stdout and stderr to the current directory. By default,
+   all of the user's environment variables are put into the script (compatible
+   with SGE only ATM).
+   
+   Options:
+     -h, --help            show this help message and exit
+     --wqsub-name=WQSUB_NAME
+                           job name to submit as <--wqsub-name>_<first non-
+                           whitespace chars in command> [default: wqsub]
+     --wqsub-ext=WQSUB_EXT
+                           file extension to use for stdout files
+     --wqsub-keep-script   do not delete qsub script generated after job
+                           submission
+     --wqsub-no-env        do not include any local environment variables in the
+                           script
+     --wqsub-no-submit     create script but do not submit job (useful for
+                           generating scripts)
+     --wqsub-drm=DRM       the DRM to generate scripts for [default: SGE]
+     --wqsub-drm-arg=DRM_ARGS
+                           arguments to pass as parameters in the job script
+                           specific to the DRM, use multiple option flags to
+                           specify multiple parameters
+     --wqsub-wait          poll the DRM and do not return control until job is
+                           finished (only works for TORQUE)
+   
+   Note: this script only works in Unix-style environments.
+      
+
+:ref:`top <top>`
+
+.. _wqsub_drmaa.py:
+
+wqsub_drmaa.py::
+
+      Traceback (most recent call last):
+     File "../scripts/wqsub_drmaa.py", line 9, in <module>
+       import drmaa
+   ImportError: No module named drmaa
+   
+
+:ref:`top <top>`
+
+
+
Binary file chipsequtil-master/examples/._mapping has changed
Binary file chipsequtil-master/examples/._nib has changed
Binary file chipsequtil-master/examples/._seq has changed
Binary file chipsequtil-master/examples/mapping/._map_to_known_gene.sh has changed
Binary file chipsequtil-master/examples/mapping/._test_peaks.xls has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/examples/mapping/map_to_known_gene.sh	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+# Usage: map_peaks_to_known_genes.py [options] <knownGene file> <knownGene xRef file> <peaks file>
+#
+#
+# Map the peaks in <peaks file> to genes in <knownGene file>.  <knownGene file>
+# is
+# format is as specified in
+# http://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/knownGene.sql.
+# <peaks file> format is as produced by MACS.  If *auto* is chosen (default)
+# file extension is examined for *.xls* for default MACS format or *.bed* for
+# BED format.  If the --detailoption is provided, the following extra fields are
+# appended to each row:
+#
+# peak loc, dist from feature, score, map type, map subtype
+#
+#
+# Options:
+#   -h, --help            show this help message and exit
+#   --upstream-window=UPST_WIN
+#                         window width in base pairs to consider promoter region
+#                         [default: 5500]
+#   --downstream-window=DNST_WIN
+#                         window width in base pairs to consider downstream
+#                         region [default: 2500]
+#   --tss                 calculate downstream window from transcription start
+#                         site instead of transcription end site
+#   --map-output=PEAK_OUTPUT
+#                         filename to output mapped peaks to [default: stdout]
+#   --stats-output=STATS_OUTPUT
+#                         filename to output summary stats in conversion
+#                         [default: stderr]
+b#   --peaks-format=PEAKS_FMT
+#                         format of peaks input file [default: auto]
+#   --detail              add extra fields to output, see description
+
+ORG=mm9
+KG_FN=$(org_settings.py $ORG known_gene_anno_path)
+XREF_FN=$(org_settings.py $ORG known_gene_xref_path)
+OPTS="--detail --tss --upstream-window=10000 --downstream-window=10000"
+PEAKS_FN=test_peaks.xls
+
+echo map_peaks_to_known_genes.py $OPTS $KG_FN $XREF_FN $PEAKS_FN
+map_peaks_to_known_genes.py $OPTS $KG_FN $XREF_FN $PEAKS_FN
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/examples/mapping/test_peaks.xls	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,21 @@
+# genes:
+# uc007aet.1	chr1	-	3195984	3205713	3195984	3195984	2	3195984,3203519,	3197398,3205713,		uc007aet.1
+# uc008wgw.1	chr5	+	3522764	3525260	3522764	3522764	1	3522764,	3525260,		uc008wgw.1
+#
+# chr5	3522663	3522664	1	0	1	0	0	1 - promoter
+# chr5	3522863	3522864	1	0	1	0	0	1 - in gene
+# chr5	3532563	3532564	1	0	1	0	0	1 - in downsteam
+# chr1	3205814	3205815	1	0	1	0	0	1 - promoter
+# chr1	3205614	3205615	1	0	1	0	0	1 - in gene
+# chr1	3195913	3195914	1	0	1	0	0	1 - in downstream
+# chr1	319588	319588	1	0	1	0	0	1 - unmapped
+#
+# chr1 is - strand, chr5 + strand, assumes 10k window around TSS
+chr	start	end	length	summit	tags	-10*log10(pvalue)	fold_enrichment	FDR(%)
+chr5	3522663	3522664	1	0	1	0	0	1
+chr5	3522863	3522864	1	0	1	0	0	1
+chr5	3532564	3532565	1	0	1	0	0	1
+chr1	3205814	3205815	1	0	1	0	0	1
+chr1	3205614	3205615	1	0	1	0	0	1
+chr1	3195913	3195914	1	0	1	0	0	1
+chr1	319588	319588	1	0	1	0	0	1
Binary file chipsequtil-master/examples/nib/._shuffled_peaks.bed has changed
Binary file chipsequtil-master/examples/nib/._test_batch_fasta.py has changed
Binary file chipsequtil-master/examples/nib/._test_nib_db.py has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/examples/nib/shuffled_peaks.bed	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,1000 @@
+chr19	29505473	29505892	MACS_peak_4348	103.85
+chr5	23950711	23951266	MACS_peak_6268	83.33
+chr1	75303135	75303785	MACS_peak_206	88.17
+chr3	105611391	105612033	MACS_peak_5420	56.03
+chr4	140654843	140655635	MACS_peak_6105	178.49
+chr2	37590398	37590707	MACS_peak_4677	75.45
+chr1	107761995	107762362	MACS_peak_312	96.07
+chr3	153387629	153388143	MACS_peak_5657	52.58
+chr11	88165911	88166520	MACS_peak_1474	62.73
+chr11	109512132	109512551	MACS_peak_1616	128.82
+chr18	57085271	57085755	MACS_peak_4115	107.73
+chr13	96661232	96661599	MACS_peak_2313	62.56
+chr3	95164133	95164494	MACS_peak_5342	93.42
+chr3	107434353	107434982	MACS_peak_5438	65.35
+chr11	6525702	6526208	MACS_peak_1057	56.89
+chr17	71137869	71138311	MACS_peak_3922	65.19
+chr5	120915880	120916171	MACS_peak_6566	100.90
+chr14	115241544	115242039	MACS_peak_2840	66.36
+chr3	115548096	115548809	MACS_peak_5466	146.81
+chr3	143368788	143369115	MACS_peak_5597	63.16
+chr12	73861752	73862246	MACS_peak_1870	80.18
+chr4	83619188	83619568	MACS_peak_5815	52.20
+chr7	80763465	80763988	MACS_peak_7410	71.38
+chr11	78816343	78817112	MACS_peak_1360	53.58
+chr10	80160393	80161035	MACS_peak_822	294.44
+chr13	32893584	32894176	MACS_peak_2117	81.21
+chr10	78218410	78218726	MACS_peak_790	64.14
+chr11	58907018	58907334	MACS_peak_1205	98.43
+chr3	104162680	104163086	MACS_peak_5410	55.17
+chr6	39156271	39156786	MACS_peak_6854	61.68
+chr18	85020575	85021002	MACS_peak_4215	74.27
+chr6	72166566	72167067	MACS_peak_6931	69.03
+chr17	56748737	56749331	MACS_peak_3884	106.79
+chr2	57090575	57091032	MACS_peak_4713	76.38
+chr6	52662598	52663126	MACS_peak_6888	97.50
+chr5	88982859	88983700	MACS_peak_6425	295.50
+chr5	134967688	134968192	MACS_peak_6645	72.85
+chr17	29089160	29089657	MACS_peak_3724	82.93
+chr8	123062177	123062589	MACS_peak_8088	58.85
+chr11	85534180	85534673	MACS_peak_1423	87.33
+chr15	66990142	66990609	MACS_peak_3114	118.53
+chr8	106966580	106967082	MACS_peak_7997	113.60
+chr11	106888391	106889001	MACS_peak_1583	69.90
+chr19	11848049	11848520	MACS_peak_4306	51.63
+chr15	8584865	8585230	MACS_peak_2922	62.73
+chr17	87913100	87913467	MACS_peak_3983	114.07
+chr13	34254496	34254848	MACS_peak_2122	67.47
+chr1	59914119	59914399	MACS_peak_135	57.79
+chr4	140629745	140629986	MACS_peak_6102	81.30
+chr2	180446822	180447260	MACS_peak_5086	99.29
+chr2	29804429	29804860	MACS_peak_4600	65.92
+chr12	32992278	32992842	MACS_peak_1783	84.01
+chr14	99698259	99698564	MACS_peak_2803	84.57
+chr19	3832712	3833378	MACS_peak_4224	118.71
+chr15	100536597	100537082	MACS_peak_3300	154.87
+chr7	109390646	109391459	MACS_peak_7527	161.60
+chr7	151692825	151693219	MACS_peak_7719	66.56
+chr14	52639405	52639860	MACS_peak_2557	52.74
+chr1	158257693	158258023	MACS_peak_461	64.88
+chr12	76836098	76836626	MACS_peak_1878	62.73
+chr1	182998458	182998880	MACS_peak_570	52.74
+chr2	51797359	51797797	MACS_peak_4703	65.46
+chr8	96707068	96707513	MACS_peak_7960	104.94
+chr3	28143185	28143670	MACS_peak_5131	101.35
+chr6	88889418	88889830	MACS_peak_7010	52.74
+chr2	131937255	131937594	MACS_peak_4912	72.89
+chr7	25688982	25689460	MACS_peak_7246	62.73
+chr19	46938054	46938331	MACS_peak_4436	92.02
+chr7	138515654	138516191	MACS_peak_7671	84.15
+chr14	29767339	29767710	MACS_peak_2466	51.44
+chr15	86002731	86003183	MACS_peak_3240	72.40
+chr15	103088442	103089223	MACS_peak_3322	883.55
+chr19	33127653	33128234	MACS_peak_4366	116.11
+chr5	135450040	135450529	MACS_peak_6650	101.01
+chr15	51080445	51080929	MACS_peak_3050	62.73
+chr9	124009677	124010094	MACS_peak_8582	65.09
+chr1	107856029	107856432	MACS_peak_313	52.07
+chr10	107555226	107555677	MACS_peak_929	79.40
+chr7	55762430	55762866	MACS_peak_7364	91.92
+chr12	96882121	96882495	MACS_peak_1959	70.03
+chr3	68480776	68481485	MACS_peak_5235	78.55
+chr1	89537056	89537406	MACS_peak_259	53.07
+chr14	27335329	27335792	MACS_peak_2450	52.74
+chr17	56949680	56949993	MACS_peak_3889	81.91
+chr5	118928141	118928605	MACS_peak_6556	117.21
+chr8	84911554	84912100	MACS_peak_7907	71.83
+chr8	129108351	129108844	MACS_peak_8142	54.79
+chr3	78877870	78878229	MACS_peak_5251	71.22
+chr19	18650375	18650861	MACS_peak_4324	62.73
+chr6	87942729	87943305	MACS_peak_6992	81.55
+chr12	92821124	92821370	MACS_peak_1955	69.53
+chr11	18065187	18065398	MACS_peak_1077	97.88
+chr17	84515588	84516165	MACS_peak_3966	458.83
+chr9	92169110	92169873	MACS_peak_8447	108.56
+chr14	14920422	14920757	MACS_peak_2398	123.05
+chr9	34798448	34798810	MACS_peak_8223	70.98
+chr3	94306130	94306466	MACS_peak_5319	95.95
+chr5	115790919	115791717	MACS_peak_6543	254.81
+chr11	68780920	68781624	MACS_peak_1249	96.66
+chr1	55084208	55084643	MACS_peak_101	56.99
+chr11	115938781	115939242	MACS_peak_1655	106.79
+chr7	134851363	134852112	MACS_peak_7651	388.87
+chr2	25413082	25413751	MACS_peak_4557	108.33
+chr9	70760521	70761198	MACS_peak_8400	125.82
+chr1	132526233	132526605	MACS_peak_367	51.37
+chr12	77462231	77462609	MACS_peak_1880	61.61
+chr2	131322118	131322495	MACS_peak_4905	173.69
+chr12	8886534	8886943	MACS_peak_1732	62.46
+chr1	134921392	134922134	MACS_peak_388	97.78
+chr12	50546587	50546853	MACS_peak_1811	72.51
+chr16	44347497	44348102	MACS_peak_3445	67.73
+chr16	91448123	91448772	MACS_peak_3510	110.35
+chr8	96932624	96932949	MACS_peak_7968	67.46
+chr9	50409776	50410148	MACS_peak_8274	68.39
+chr15	39018860	39019403	MACS_peak_3023	96.24
+chrX	7548382	7548918	MACS_peak_8587	182.65
+chr1	36568547	36568801	MACS_peak_47	57.79
+chr3	133241295	133241605	MACS_peak_5543	56.48
+chr3	36470919	36471238	MACS_peak_5148	54.12
+chr5	137974253	137974619	MACS_peak_6683	59.42
+chr4	107278613	107279232	MACS_peak_5866	117.82
+chr8	3621220	3621676	MACS_peak_7722	76.85
+chr11	68792865	68793384	MACS_peak_1250	61.41
+chr11	107283838	107284259	MACS_peak_1593	62.31
+chr17	36162344	36162790	MACS_peak_3801	77.75
+chr2	119176647	119177021	MACS_peak_4841	59.32
+chr14	75947689	75947989	MACS_peak_2746	115.64
+chr2	32837666	32838081	MACS_peak_4650	56.37
+chr5	21772275	21772751	MACS_peak_6260	88.64
+chr4	88181586	88181956	MACS_peak_5819	83.97
+chr17	46210576	46211375	MACS_peak_3824	152.68
+chr8	113290700	113290975	MACS_peak_8033	68.02
+chr14	100246709	100247166	MACS_peak_2804	114.56
+chr18	21097256	21097529	MACS_peak_4028	188.09
+chr15	58175270	58175626	MACS_peak_3078	52.59
+chr9	61513942	61514355	MACS_peak_8334	216.69
+chr10	92184761	92185425	MACS_peak_881	113.68
+chr2	125450541	125451011	MACS_peak_4863	84.01
+chr7	120579702	120580147	MACS_peak_7571	84.01
+chr17	28313728	28314505	MACS_peak_3710	147.88
+chr17	85092137	85092578	MACS_peak_3972	60.05
+chr7	52391580	52392059	MACS_peak_7336	71.31
+chr4	106607491	106607860	MACS_peak_5861	62.73
+chr15	76531134	76532498	MACS_peak_3158	205.05
+chr12	86815403	86815709	MACS_peak_1937	55.62
+chr8	97381250	97381634	MACS_peak_7975	67.56
+chr2	18892130	18892531	MACS_peak_4517	53.98
+chr13	93362690	93363352	MACS_peak_2290	156.01
+chr4	134276344	134276744	MACS_peak_6023	66.29
+chr5	136189308	136189833	MACS_peak_6660	92.87
+chr13	54712548	54712992	MACS_peak_2192	70.78
+chr3	95116459	95117202	MACS_peak_5338	276.81
+chr15	55668280	55668565	MACS_peak_3068	57.79
+chr7	86508145	86508581	MACS_peak_7430	65.59
+chr13	64134767	64135424	MACS_peak_2229	84.01
+chr14	75405717	75405947	MACS_peak_2740	56.48
+chr2	34655577	34655906	MACS_peak_4662	86.22
+chr2	178420601	178420979	MACS_peak_5071	60.99
+chr7	80675775	80676079	MACS_peak_7406	57.04
+chr6	120314001	120314656	MACS_peak_7092	155.03
+chr11	103889450	103889863	MACS_peak_1547	105.51
+chr1	75209595	75210147	MACS_peak_201	195.59
+chr4	136209837	136210242	MACS_peak_6063	91.62
+chr19	38298472	38299109	MACS_peak_4384	52.09
+chr3	146318049	146318677	MACS_peak_5622	65.43
+chr8	97525645	97526124	MACS_peak_7981	83.75
+chr6	42299260	42299977	MACS_peak_6864	156.01
+chr13	95746101	95746664	MACS_peak_2305	118.67
+chr5	68262648	68262928	MACS_peak_6374	76.86
+chr9	4309901	4310202	MACS_peak_8156	57.32
+chr2	130455636	130455898	MACS_peak_4896	68.02
+chr7	133920084	133920580	MACS_peak_7627	94.25
+chr3	144712794	144713309	MACS_peak_5603	333.24
+chr4	41492809	41493178	MACS_peak_5745	61.67
+chr6	83725731	83726256	MACS_peak_6965	72.37
+chr14	123928421	123928771	MACS_peak_2892	53.07
+chr11	94409579	94409974	MACS_peak_1489	68.44
+chr2	165618765	165619347	MACS_peak_5039	77.35
+chr1	97210080	97210414	MACS_peak_302	73.84
+chr19	31412009	31412328	MACS_peak_4353	67.09
+chr7	146028031	146028398	MACS_peak_7696	57.49
+chr14	98617003	98617302	MACS_peak_2799	57.51
+chr19	44406048	44406439	MACS_peak_4413	66.95
+chr14	26681413	26681976	MACS_peak_2449	117.79
+chr2	128037989	128038430	MACS_peak_4878	52.74
+chr17	61434287	61434641	MACS_peak_3905	62.86
+chr15	36390225	36390517	MACS_peak_2989	66.98
+chr14	27398759	27399655	MACS_peak_2452	361.52
+chr11	116115836	116116290	MACS_peak_1661	77.03
+chr15	36579667	36580306	MACS_peak_2996	51.96
+chr1	57568835	57569128	MACS_peak_112	60.79
+chr15	67474872	67475357	MACS_peak_3123	158.15
+chr10	19428365	19428826	MACS_peak_632	89.84
+chr14	113392921	113393120	MACS_peak_2836	66.98
+chr15	38448807	38449350	MACS_peak_3019	59.81
+chr14	20991935	20992435	MACS_peak_2406	75.91
+chr6	134006321	134006678	MACS_peak_7142	71.38
+chr12	112127235	112127724	MACS_peak_2013	80.19
+chr14	76244671	76245541	MACS_peak_2752	107.12
+chr11	104164505	104164874	MACS_peak_1549	69.13
+chr7	134536698	134537132	MACS_peak_7646	78.65
+chr1	137867871	137868260	MACS_peak_415	143.30
+chr18	34665859	34666370	MACS_peak_4058	61.96
+chr1	129101475	129101945	MACS_peak_348	77.88
+chr11	72295448	72295925	MACS_peak_1293	156.01
+chr17	24591995	24592521	MACS_peak_3651	114.96
+chr15	3945339	3946408	MACS_peak_2896	271.22
+chr8	122250900	122251332	MACS_peak_8064	71.78
+chr11	115938158	115938571	MACS_peak_1654	58.78
+chr9	114597610	114598135	MACS_peak_8535	92.87
+chr6	43207256	43207620	MACS_peak_6869	70.82
+chr3	152935129	152935658	MACS_peak_5650	60.73
+chr3	94655429	94656010	MACS_peak_5324	210.22
+chr9	57368841	57369352	MACS_peak_8313	53.61
+chr4	3157974	3158349	MACS_peak_5679	52.74
+chr11	107211666	107212176	MACS_peak_1586	89.67
+chr15	42269449	42270170	MACS_peak_3035	131.90
+chr9	70682529	70683032	MACS_peak_8396	186.94
+chr8	27125446	27126059	MACS_peak_7778	102.67
+chr9	20896025	20896479	MACS_peak_8195	67.10
+chr15	75551370	75551790	MACS_peak_3136	66.67
+chr15	55028995	55029425	MACS_peak_3064	90.94
+chr16	18308240	18308586	MACS_peak_3350	58.96
+chr3	93353745	93354375	MACS_peak_5318	103.23
+chr16	23107242	23107924	MACS_peak_3367	113.70
+chr18	36486603	36487009	MACS_peak_4080	53.46
+chr18	5390330	5390807	MACS_peak_4001	113.10
+chr17	56428661	56429186	MACS_peak_3882	118.67
+chr2	18860310	18861083	MACS_peak_4512	84.24
+chr7	97888242	97888576	MACS_peak_7477	57.36
+chr3	21810071	21810487	MACS_peak_5121	118.67
+chr17	78181904	78182525	MACS_peak_3946	77.05
+chr14	56197450	56198063	MACS_peak_2598	129.98
+chr9	99140804	99141128	MACS_peak_8467	58.22
+chr10	92623323	92623821	MACS_peak_885	100.15
+chr4	140616351	140617131	MACS_peak_6099	80.20
+chr10	61142776	61143539	MACS_peak_744	80.20
+chr7	104485058	104485742	MACS_peak_7488	317.41
+chr11	115939476	115940007	MACS_peak_1656	92.32
+chr10	94580987	94581311	MACS_peak_903	56.69
+chr15	76157364	76157952	MACS_peak_3152	125.64
+chr13	14155415	14155855	MACS_peak_2065	52.91
+chr15	67066485	67066934	MACS_peak_3117	84.01
+chr7	29227640	29228147	MACS_peak_7277	73.17
+chr13	6514405	6514820	MACS_peak_2047	104.32
+chr4	140542557	140543005	MACS_peak_6097	144.88
+chr5	111937855	111938599	MACS_peak_6514	128.49
+chr16	44018427	44018767	MACS_peak_3442	64.02
+chr1	133421664	133422047	MACS_peak_377	82.81
+chrX	166419443	166419942	MACS_peak_8678	54.41
+chr15	93105701	93105937	MACS_peak_3251	154.40
+chr1	108780375	108780748	MACS_peak_320	57.56
+chr11	84636850	84637366	MACS_peak_1410	80.20
+chr17	24995915	24996584	MACS_peak_3656	135.78
+chr14	58033892	58034211	MACS_peak_2613	58.66
+chr13	29847874	29848368	MACS_peak_2108	158.86
+chr1	13520675	13521060	MACS_peak_11	108.01
+chr2	156137538	156137972	MACS_peak_4990	78.65
+chr8	87550632	87550994	MACS_peak_7941	66.66
+chr3	151768385	151768678	MACS_peak_5634	56.48
+chr3	108012888	108013451	MACS_peak_5443	78.80
+chr13	44597050	44597814	MACS_peak_2154	202.82
+chr2	31917741	31918033	MACS_peak_4624	91.45
+chr3	132521750	132522383	MACS_peak_5537	143.48
+chr12	4879663	4880069	MACS_peak_1724	78.35
+chr6	91628640	91629356	MACS_peak_7022	67.31
+chr3	81433756	81434158	MACS_peak_5257	67.93
+chr7	54138715	54139193	MACS_peak_7359	128.73
+chr5	137102584	137103013	MACS_peak_6672	81.24
+chr8	59967224	59967628	MACS_peak_7830	62.73
+chr14	73689765	73690147	MACS_peak_2729	76.26
+chr11	117671467	117671893	MACS_peak_1678	66.26
+chr1	133214967	133215419	MACS_peak_376	174.60
+chr15	72853276	72853636	MACS_peak_3127	52.74
+chr11	109334214	109334929	MACS_peak_1611	51.65
+chrX	45266253	45266899	MACS_peak_8617	128.35
+chr2	131877465	131877919	MACS_peak_4910	132.70
+chr9	20779965	20780304	MACS_peak_8192	60.28
+chr3	90068955	90069393	MACS_peak_5310	78.47
+chr5	76187734	76188295	MACS_peak_6404	65.35
+chr11	104180396	104181197	MACS_peak_1550	84.55
+chr9	43839155	43839734	MACS_peak_8243	170.66
+chr15	85812555	85813334	MACS_peak_3239	124.98
+chr16	30691946	30692407	MACS_peak_3394	226.47
+chr2	110401236	110401587	MACS_peak_4817	63.11
+chr5	125914300	125914711	MACS_peak_6623	65.51
+chr2	166483417	166483707	MACS_peak_5047	52.10
+chr8	60131046	60131454	MACS_peak_7833	52.74
+chr1	153024254	153024901	MACS_peak_439	66.68
+chr6	135133407	135133856	MACS_peak_7151	66.57
+chr7	82993032	82993383	MACS_peak_7423	52.99
+chr12	36728733	36729377	MACS_peak_1795	106.79
+chr19	54161870	54162283	MACS_peak_4440	93.96
+chr13	21366775	21367132	MACS_peak_2077	58.81
+chr7	140828409	140828831	MACS_peak_7684	68.81
+chr7	52771782	52772179	MACS_peak_7344	57.63
+chr11	57258571	57259095	MACS_peak_1191	98.14
+chr10	19855329	19855821	MACS_peak_636	125.55
+chr9	48594723	48595281	MACS_peak_8268	79.19
+chr4	41278073	41278681	MACS_peak_5744	81.51
+chr18	44988493	44988911	MACS_peak_4095	69.91
+chr1	74438395	74439179	MACS_peak_195	162.82
+chr3	108830511	108830918	MACS_peak_5453	62.62
+chr13	96427044	96427529	MACS_peak_2310	152.26
+chr1	142384049	142384472	MACS_peak_423	79.51
+chr1	179064649	179064883	MACS_peak_543	74.87
+chr3	105490131	105490555	MACS_peak_5418	63.69
+chr2	90508129	90508418	MACS_peak_4780	76.86
+chr15	81846602	81847127	MACS_peak_3217	117.96
+chr18	3270592	3271094	MACS_peak_3989	195.43
+chr1	108606863	108607335	MACS_peak_318	95.47
+chr13	75935312	75935640	MACS_peak_2250	63.30
+chr16	30789953	30790403	MACS_peak_3396	148.02
+chr10	111409491	111409958	MACS_peak_950	131.28
+chr9	40880928	40881362	MACS_peak_8236	65.72
+chr8	123191898	123192493	MACS_peak_8089	118.67
+chr12	86713029	86713482	MACS_peak_1934	95.23
+chr18	65281748	65282564	MACS_peak_4150	161.32
+chr9	37296593	37297143	MACS_peak_8230	129.00
+chr18	75530251	75530647	MACS_peak_4189	68.37
+chr14	64162422	64162897	MACS_peak_2650	62.73
+chr10	82222485	82222777	MACS_peak_854	129.73
+chr10	51248911	51249493	MACS_peak_714	104.02
+chr19	45612299	45612910	MACS_peak_4419	89.38
+chr16	59515986	59516330	MACS_peak_3480	72.46
+chr1	37364506	37364915	MACS_peak_55	77.39
+chr9	107436160	107436580	MACS_peak_8495	73.91
+chr6	123239085	123239498	MACS_peak_7098	80.30
+chr8	24145434	24145873	MACS_peak_7767	65.39
+chr17	59064066	59064738	MACS_peak_3903	193.40
+chr18	81626532	81626968	MACS_peak_4206	53.17
+chr8	72498191	72498470	MACS_peak_7850	76.86
+chr2	127033717	127034197	MACS_peak_4869	122.43
+chr3	153427354	153428352	MACS_peak_5658	173.73
+chr13	95777240	95777685	MACS_peak_2306	62.73
+chr6	90654616	90655084	MACS_peak_7016	74.57
+chr6	115545743	115546136	MACS_peak_7072	52.74
+chr7	52392685	52393201	MACS_peak_7337	105.13
+chr1	174445177	174445620	MACS_peak_534	65.13
+chr5	139853354	139853932	MACS_peak_6702	77.65
+chr17	44266175	44266552	MACS_peak_3809	97.39
+chr9	78919711	78920097	MACS_peak_8424	50.34
+chr2	120210305	120210628	MACS_peak_4846	69.96
+chr8	97679869	97680475	MACS_peak_7985	130.84
+chr14	70029196	70029514	MACS_peak_2696	58.75
+chr11	97574402	97574747	MACS_peak_1511	53.47
+chr2	56968627	56969614	MACS_peak_4711	285.57
+chr7	26472954	26473335	MACS_peak_7258	62.73
+chr1	146985918	146986334	MACS_peak_432	62.66
+chr6	30276109	30276518	MACS_peak_6816	73.17
+chr18	4969715	4970163	MACS_peak_3999	62.73
+chr6	85298851	85299333	MACS_peak_6971	130.45
+chr18	62318702	62319054	MACS_peak_4130	55.87
+chr7	97493416	97493783	MACS_peak_7473	95.23
+chr5	84728325	84728797	MACS_peak_6420	65.95
+chr15	96290510	96290960	MACS_peak_3260	75.41
+chr5	64493902	64494502	MACS_peak_6348	155.47
+chr12	70683782	70684144	MACS_peak_1854	74.38
+chr7	28259485	28260176	MACS_peak_7269	157.45
+chr3	102072769	102073100	MACS_peak_5391	64.80
+chr3	121177634	121178278	MACS_peak_5487	124.60
+chr3	141995570	141995959	MACS_peak_5587	74.70
+chr10	12681163	12681522	MACS_peak_617	57.44
+chr7	35770301	35770804	MACS_peak_7310	130.48
+chr3	107901318	107901701	MACS_peak_5442	68.87
+chr4	155406985	155407313	MACS_peak_6229	63.11
+chr14	46277533	46277983	MACS_peak_2523	63.09
+chr7	142790268	142790503	MACS_peak_7693	89.37
+chr9	66360249	66360570	MACS_peak_8377	57.12
+chr15	95621015	95621459	MACS_peak_3254	77.44
+chr4	71861086	71862075	MACS_peak_5807	206.03
+chr11	121065722	121066055	MACS_peak_1707	99.74
+chr19	9041528	9042024	MACS_peak_4289	107.21
+chr8	98477882	98478259	MACS_peak_7992	69.80
+chr18	75722207	75722491	MACS_peak_4193	91.45
+chr15	57812241	57812965	MACS_peak_3074	52.05
+chr3	58917608	58918452	MACS_peak_5209	152.48
+chr4	41660450	41660822	MACS_peak_5747	57.64
+chr11	11641587	11641928	MACS_peak_1066	55.63
+chr8	50911172	50911534	MACS_peak_7818	70.98
+chr11	120209562	120209886	MACS_peak_1697	66.59
+chr14	66971802	66972207	MACS_peak_2681	62.73
+chr3	98621426	98621709	MACS_peak_5379	72.51
+chr12	49775350	49775758	MACS_peak_1809	67.51
+chr12	17040311	17040756	MACS_peak_1752	86.89
+chr14	70465516	70466231	MACS_peak_2709	158.40
+chr4	106926454	106926892	MACS_peak_5863	65.46
+chr11	5221117	5221579	MACS_peak_1043	129.84
+chr11	51762768	51763314	MACS_peak_1170	137.47
+chr12	73948553	73949012	MACS_peak_1872	142.64
+chr15	12123626	12124218	MACS_peak_2933	94.43
+chr15	12246914	12247416	MACS_peak_2937	294.48
+chr2	7924537	7924842	MACS_peak_4478	63.31
+chr16	56916814	56917191	MACS_peak_3470	51.00
+chr14	57190198	57191173	MACS_peak_2608	120.19
+chr5	138011402	138012367	MACS_peak_6684	707.79
+chr1	36153980	36154800	MACS_peak_40	119.59
+chr9	105397273	105397630	MACS_peak_8483	90.84
+chr4	148542288	148542494	MACS_peak_6147	99.30
+chr7	134234472	134235313	MACS_peak_7633	215.08
+chr1	187186557	187186854	MACS_peak_584	84.01
+chr2	156703464	156703925	MACS_peak_5000	135.84
+chr2	45507624	45507896	MACS_peak_4694	66.03
+chr2	25110687	25111472	MACS_peak_4543	265.66
+chr13	23494534	23495087	MACS_peak_2082	74.85
+chr2	118738734	118739174	MACS_peak_4833	58.77
+chrX	11733021	11733752	MACS_peak_8601	84.09
+chr3	153560124	153560559	MACS_peak_5664	53.23
+chr8	97479035	97479520	MACS_peak_7976	156.01
+chr9	114662010	114662635	MACS_peak_8538	65.64
+chr18	56618529	56618905	MACS_peak_4110	56.89
+chr17	34057391	34058028	MACS_peak_3762	56.35
+chr1	99519858	99520254	MACS_peak_306	57.70
+chr4	136194817	136195184	MACS_peak_6060	98.40
+chr7	16611238	16611688	MACS_peak_7204	52.74
+chr1	60215214	60215684	MACS_peak_140	88.89
+chr6	149257575	149258040	MACS_peak_7180	65.32
+chr4	8159311	8159627	MACS_peak_5687	55.94
+chr14	45660604	45661144	MACS_peak_2518	98.84
+chr11	84024342	84024705	MACS_peak_1402	50.69
+chr11	108110784	108111439	MACS_peak_1606	82.28
+chr7	87590346	87590812	MACS_peak_7448	54.95
+chr9	35018443	35018749	MACS_peak_8226	74.08
+chr7	61764305	61764697	MACS_peak_7375	62.07
+chr3	137620670	137621228	MACS_peak_5569	110.82
+chr8	89147603	89148183	MACS_peak_7945	106.99
+chr10	80982282	80982979	MACS_peak_848	148.39
+chr2	113012940	113013326	MACS_peak_4821	56.59
+chr16	93767743	93768080	MACS_peak_3559	109.86
+chr2	4483390	4484698	MACS_peak_4459	128.33
+chr6	128792917	128793800	MACS_peak_7130	123.88
+chr5	148241425	148242026	MACS_peak_6759	97.14
+chr4	34829946	34830380	MACS_peak_5730	65.72
+chr3	37558222	37559100	MACS_peak_5161	173.11
+chr2	90894346	90894793	MACS_peak_4781	72.76
+chr8	107486121	107486440	MACS_peak_7999	81.28
+chr7	140064742	140065140	MACS_peak_7681	73.17
+chr12	30367083	30367515	MACS_peak_1770	73.89
+chrX	11711607	11711970	MACS_peak_8600	84.60
+chr15	5058192	5058833	MACS_peak_2901	89.38
+chr7	104727397	104728070	MACS_peak_7489	70.84
+chr6	133055524	133055892	MACS_peak_7138	62.47
+chr3	95558657	95559026	MACS_peak_5348	79.54
+chr17	35326947	35327306	MACS_peak_3781	66.90
+chr14	52816486	52817050	MACS_peak_2560	64.66
+chr1	87632880	87633301	MACS_peak_233	58.17
+chr9	57495286	57495888	MACS_peak_8318	128.82
+chr11	87571803	87572391	MACS_peak_1454	59.68
+chr4	101511482	101511839	MACS_peak_5847	71.38
+chr15	12251825	12252367	MACS_peak_2938	54.29
+chr8	24276703	24277308	MACS_peak_7770	91.11
+chr6	117981548	117981940	MACS_peak_7086	68.66
+chr7	118300107	118300551	MACS_peak_7564	60.79
+chr5	77553172	77553619	MACS_peak_6415	77.65
+chr7	133428410	133429279	MACS_peak_7615	176.96
+chr5	54386367	54386928	MACS_peak_6343	135.99
+chr2	157967843	157968322	MACS_peak_5015	52.74
+chr1	13579885	13580466	MACS_peak_13	106.79
+chr17	47825794	47826338	MACS_peak_3846	103.13
+chr15	96115848	96116105	MACS_peak_3259	89.28
+chr6	8018474	8018781	MACS_peak_6782	61.94
+chr1	58769938	58770557	MACS_peak_122	106.56
+chr18	13100063	13100479	MACS_peak_4020	101.54
+chr1	95462306	95462739	MACS_peak_289	61.51
+chr13	8456217	8456514	MACS_peak_2052	121.95
+chr8	87426937	87427392	MACS_peak_7932	147.42
+chr3	69488182	69488508	MACS_peak_5241	55.06
+chr5	108495385	108495819	MACS_peak_6502	53.36
+chr7	26391500	26391968	MACS_peak_7253	62.73
+chr14	122222542	122222864	MACS_peak_2879	60.02
+chr7	16880847	16881143	MACS_peak_7213	72.15
+chr10	84379493	84379935	MACS_peak_862	65.19
+chr1	93218296	93218729	MACS_peak_276	67.88
+chr7	134005243	134005813	MACS_peak_7631	156.01
+chr9	25059978	25060419	MACS_peak_8215	65.26
+chr2	4802272	4802882	MACS_peak_4463	90.71
+chr9	114640488	114640918	MACS_peak_8537	63.14
+chr1	155044510	155044840	MACS_peak_453	52.74
+chr2	181598797	181599317	MACS_peak_5098	52.05
+chr16	30227325	30227906	MACS_peak_3389	144.56
+chr2	33582864	33583285	MACS_peak_4654	61.53
+chr2	38920882	38921337	MACS_peak_4683	62.69
+chr12	8639627	8640095	MACS_peak_1730	89.27
+chr1	193244835	193245254	MACS_peak_592	63.53
+chr19	28042093	28042496	MACS_peak_4342	185.71
+chr18	67399653	67399896	MACS_peak_4155	104.20
+chr15	81702453	81702935	MACS_peak_3214	70.29
+chr2	4354267	4354521	MACS_peak_4454	66.03
+chr17	71599086	71599527	MACS_peak_3929	65.92
+chr11	115016216	115016976	MACS_peak_1629	260.88
+chr13	49402730	49403235	MACS_peak_2170	96.40
+chr1	173607566	173608216	MACS_peak_521	161.65
+chr4	149943597	149944833	MACS_peak_6170	714.32
+chr2	30033180	30033596	MACS_peak_4605	60.37
+chr12	73775435	73775826	MACS_peak_1868	193.90
+chr19	6686904	6687279	MACS_peak_4266	106.79
+chr13	94372068	94372377	MACS_peak_2298	52.57
+chr3	134875358	134875676	MACS_peak_5549	74.76
+chr14	35123318	35123817	MACS_peak_2496	91.32
+chr4	134064080	134064462	MACS_peak_6020	65.66
+chr7	38451614	38451982	MACS_peak_7318	51.67
+chr2	59721277	59721585	MACS_peak_4718	52.74
+chr4	148521433	148521717	MACS_peak_6146	50.43
+chr6	29651055	29651524	MACS_peak_6813	99.40
+chr2	25283862	25284454	MACS_peak_4553	118.30
+chr1	180335685	180336069	MACS_peak_548	70.43
+chr15	9000420	9001374	MACS_peak_2924	50.16
+chr17	76783407	76783741	MACS_peak_3945	73.33
+chr10	79377042	79377510	MACS_peak_798	105.25
+chr4	137813129	137813637	MACS_peak_6069	79.35
+chr19	23347935	23348633	MACS_peak_4335	168.98
+chr2	77014459	77014721	MACS_peak_4764	57.79
+chr17	27725137	27725648	MACS_peak_3703	81.61
+chr3	84271282	84271895	MACS_peak_5266	107.11
+chr4	149036130	149036714	MACS_peak_6156	127.11
+chr17	36157226	36157966	MACS_peak_3800	58.06
+chr9	113925463	113925818	MACS_peak_8532	139.16
+chr18	62455027	62455412	MACS_peak_4131	69.19
+chr2	143717397	143717821	MACS_peak_4930	57.72
+chr14	70058939	70059262	MACS_peak_2698	78.56
+chr9	8004492	8005091	MACS_peak_8167	146.39
+chr2	22750741	22751369	MACS_peak_4526	107.68
+chr11	113663893	113664272	MACS_peak_1623	118.67
+chr11	60643876	60644376	MACS_peak_1224	75.91
+chr13	55463887	55464601	MACS_peak_2197	89.38
+chr3	138158153	138158835	MACS_peak_5576	101.13
+chr9	61779725	61780181	MACS_peak_8335	52.74
+chr5	141092685	141093127	MACS_peak_6724	60.64
+chr4	151560621	151560894	MACS_peak_6192	80.71
+chr12	71087816	71088552	MACS_peak_1856	105.21
+chr3	136623971	136624307	MACS_peak_5565	54.21
+chr18	64675715	64676128	MACS_peak_4137	67.15
+chr5	93521864	93522250	MACS_peak_6451	50.34
+chr14	27666233	27666572	MACS_peak_2457	95.10
+chr17	65649466	65649790	MACS_peak_3914	74.21
+chr3	96961630	96962284	MACS_peak_5365	62.03
+chr19	46681813	46682242	MACS_peak_4433	64.27
+chr5	33677654	33678040	MACS_peak_6302	133.38
+chr1	155123197	155123551	MACS_peak_454	52.75
+chr11	104222718	104223628	MACS_peak_1551	135.09
+chr12	40834638	40835084	MACS_peak_1801	104.85
+chr5	140797328	140797751	MACS_peak_6714	136.27
+chr8	124636207	124636603	MACS_peak_8095	55.87
+chr1	33776550	33777146	MACS_peak_29	54.49
+chr2	127277423	127277785	MACS_peak_4871	62.22
+chr16	11144052	11144357	MACS_peak_3337	75.98
+chr2	71759141	71759569	MACS_peak_4740	61.84
+chr5	144654264	144654609	MACS_peak_6750	202.40
+chr6	136416896	136417766	MACS_peak_7155	107.68
+chr19	61160284	61160710	MACS_peak_4445	79.27
+chr5	135513632	135514247	MACS_peak_6652	52.16
+chr10	69559457	69559926	MACS_peak_764	75.43
+chr19	34625289	34625732	MACS_peak_4369	58.58
+chr3	129778582	129778971	MACS_peak_5530	52.74
+chr3	40549079	40549989	MACS_peak_5170	139.15
+chr12	63655639	63655947	MACS_peak_1841	75.70
+chr12	88027775	88028206	MACS_peak_1939	57.42
+chr4	149930560	149930906	MACS_peak_6169	51.58
+chr7	26175003	26175297	MACS_peak_7251	193.27
+chr3	137631502	137632466	MACS_peak_5570	270.50
+chr7	75095358	75096309	MACS_peak_7396	276.27
+chr13	112597147	112598260	MACS_peak_2361	120.74
+chr8	73397210	73397892	MACS_peak_7870	88.07
+chr10	57870391	57870790	MACS_peak_723	61.56
+chr12	21379875	21380271	MACS_peak_1759	71.11
+chr4	149229209	149229627	MACS_peak_6162	68.37
+chr11	79454167	79454630	MACS_peak_1371	103.29
+chr2	118577801	118578303	MACS_peak_4831	83.83
+chr12	90031052	90031356	MACS_peak_1953	67.25
+chr3	89221936	89222334	MACS_peak_5302	55.52
+chr11	49015967	49017374	MACS_peak_1149	140.92
+chr5	101854756	101855187	MACS_peak_6476	57.25
+chr14	55045118	55046069	MACS_peak_2572	278.29
+chr8	122360636	122360974	MACS_peak_8068	72.98
+chr6	29559590	29559996	MACS_peak_6810	66.51
+chr8	37675573	37676057	MACS_peak_7806	52.74
+chr7	135604640	135605584	MACS_peak_7660	140.03
+chr7	75215546	75215889	MACS_peak_7400	73.17
+chr11	6387328	6387780	MACS_peak_1054	66.34
+chr6	97171581	97172040	MACS_peak_7033	76.80
+chr2	71652110	71652536	MACS_peak_4738	52.74
+chr14	70205001	70205574	MACS_peak_2702	90.17
+chr7	4636478	4636845	MACS_peak_7189	84.01
+chr1	163697037	163697580	MACS_peak_480	152.14
+chr14	69905127	69905516	MACS_peak_2694	60.69
+chr4	105905243	105905631	MACS_peak_5853	53.13
+chr19	43763805	43764296	MACS_peak_4405	84.01
+chr15	98863988	98864259	MACS_peak_3288	109.33
+chr8	28268378	28268863	MACS_peak_7783	143.30
+chr5	50210130	50210512	MACS_peak_6329	203.50
+chr1	49424163	49424526	MACS_peak_75	70.90
+chr11	114416815	114417130	MACS_peak_1628	75.04
+chr2	29967973	29968359	MACS_peak_4603	84.01
+chr11	87275081	87275514	MACS_peak_1443	92.18
+chr9	72510503	72510911	MACS_peak_8403	63.21
+chr18	32996570	32997045	MACS_peak_4049	106.79
+chr7	108812030	108812396	MACS_peak_7522	58.27
+chr11	61377499	61378145	MACS_peak_1228	59.79
+chr5	141051472	141051938	MACS_peak_6718	69.03
+chr13	36416595	36416984	MACS_peak_2127	55.95
+chr9	14446069	14446592	MACS_peak_8182	98.84
+chr10	117850777	117851031	MACS_peak_971	57.79
+chr8	126502767	126503316	MACS_peak_8125	69.46
+chr6	66891898	66892295	MACS_peak_6917	59.59
+chr4	122959709	122960251	MACS_peak_5935	121.49
+chr12	60308039	60308451	MACS_peak_1839	80.38
+chr5	137108320	137108562	MACS_peak_6673	91.45
+chr4	129373773	129374378	MACS_peak_5972	131.90
+chr2	45268392	45268789	MACS_peak_4692	56.57
+chr5	141120758	141121124	MACS_peak_6726	71.31
+chr16	30453372	30453956	MACS_peak_3392	106.68
+chrX	71542249	71542578	MACS_peak_8628	64.97
+chr12	72743380	72743811	MACS_peak_1864	61.64
+chrX	108755267	108755697	MACS_peak_8648	65.99
+chr9	45983547	45983831	MACS_peak_8258	57.79
+chr14	63049340	63049683	MACS_peak_2644	61.79
+chr7	105719591	105719912	MACS_peak_7498	88.72
+chr7	65987933	65988294	MACS_peak_7377	74.48
+chr7	26496882	26497392	MACS_peak_7260	73.32
+chr3	157588086	157588412	MACS_peak_5676	86.48
+chr5	66089157	66089851	MACS_peak_6367	87.07
+chr1	63823189	63823575	MACS_peak_148	56.59
+chr19	8872798	8873377	MACS_peak_4283	89.38
+chr2	179759459	179759977	MACS_peak_5073	51.39
+chr6	128611850	128612175	MACS_peak_7127	74.12
+chr6	125049277	125049748	MACS_peak_7109	130.85
+chr14	58645884	58646276	MACS_peak_2621	52.85
+chr7	20080932	20081328	MACS_peak_7239	66.58
+chr2	131917466	131917870	MACS_peak_4911	59.48
+chr5	3152015	3152483	MACS_peak_6238	160.90
+chr2	132512500	132512844	MACS_peak_4916	59.86
+chrX	99352299	99352715	MACS_peak_8645	66.94
+chr18	55059820	55060479	MACS_peak_4108	118.53
+chr3	40456923	40457382	MACS_peak_5169	255.48
+chr11	57331929	57332212	MACS_peak_1192	57.79
+chr9	65389306	65389659	MACS_peak_8364	62.73
+chr6	30252722	30253129	MACS_peak_6815	70.78
+chr9	74844269	74844678	MACS_peak_8409	69.94
+chr3	79787772	79788143	MACS_peak_5255	89.40
+chr5	97259867	97260133	MACS_peak_6460	68.02
+chr7	147392845	147393149	MACS_peak_7705	66.02
+chrX	71516418	71516883	MACS_peak_8627	52.74
+chr4	135841295	135841647	MACS_peak_6056	62.73
+chr17	34781424	34781705	MACS_peak_3772	57.79
+chr6	108654497	108654889	MACS_peak_7052	82.04
+chr1	88337836	88338284	MACS_peak_244	100.57
+chr16	18876401	18877082	MACS_peak_3353	122.18
+chr15	86033062	86033641	MACS_peak_3242	60.29
+chr11	16851380	16851985	MACS_peak_1071	71.31
+chr7	125272857	125273444	MACS_peak_7585	163.78
+chr12	53738815	53739179	MACS_peak_1825	70.07
+chr2	156665349	156666095	MACS_peak_4998	100.62
+chr7	133942356	133942843	MACS_peak_7630	98.04
+chr9	90020990	90021384	MACS_peak_8443	52.74
+chr11	83658247	83658585	MACS_peak_1398	60.37
+chr14	52103248	52103624	MACS_peak_2554	55.37
+chr18	36446981	36447399	MACS_peak_4078	89.45
+chr14	22367170	22367534	MACS_peak_2429	72.40
+chr15	53498017	53498814	MACS_peak_3061	116.56
+chr11	87256810	87257164	MACS_peak_1441	111.94
+chr9	122859679	122860394	MACS_peak_8568	109.13
+chr1	23930853	23931253	MACS_peak_23	249.39
+chr12	70598412	70598748	MACS_peak_1852	89.26
+chr13	51943389	51943842	MACS_peak_2182	104.20
+chr19	29138427	29138708	MACS_peak_4343	57.79
+chr8	81885020	81885368	MACS_peak_7895	87.82
+chr11	106277303	106277812	MACS_peak_1574	61.29
+chr14	119583365	119583707	MACS_peak_2862	52.74
+chr6	32801035	32801303	MACS_peak_6834	51.52
+chr10	94394483	94395062	MACS_peak_900	88.16
+chr3	37565697	37565925	MACS_peak_5162	90.55
+chr3	145588349	145588654	MACS_peak_5611	80.31
+chr19	23061529	23061970	MACS_peak_4334	65.26
+chr17	26989228	26989502	MACS_peak_3680	76.86
+chr1	95970905	95971720	MACS_peak_298	160.55
+chr4	108520352	108520992	MACS_peak_5880	118.67
+chr3	26391575	26392055	MACS_peak_5126	88.33
+chr3	8919741	8920292	MACS_peak_5104	77.70
+chr1	29104970	29105334	MACS_peak_25	70.82
+chr16	58637925	58638495	MACS_peak_3475	107.78
+chr15	57966348	57966985	MACS_peak_3075	149.78
+chr13	115022343	115022626	MACS_peak_2368	64.17
+chr11	67905507	67905890	MACS_peak_1243	200.30
+chr17	29330165	29330593	MACS_peak_3730	142.62
+chr11	119161198	119161769	MACS_peak_1689	135.03
+chr4	140249323	140249820	MACS_peak_6089	56.52
+chr1	35926096	35926623	MACS_peak_38	55.35
+chr1	59412217	59412591	MACS_peak_129	51.22
+chr2	181414705	181415126	MACS_peak_5096	57.92
+chr17	57418275	57418714	MACS_peak_3896	78.38
+chr8	87246451	87247128	MACS_peak_7927	93.62
+chr12	81913169	81913458	MACS_peak_1914	52.00
+chr9	88275002	88275236	MACS_peak_8441	69.53
+chr11	103078799	103079762	MACS_peak_1540	129.73
+chr7	148141747	148142194	MACS_peak_7708	52.74
+chr19	41338432	41338860	MACS_peak_4389	51.90
+chr16	91538765	91539078	MACS_peak_3515	52.29
+chr7	132761686	132762056	MACS_peak_7609	66.03
+chr5	138070239	138070549	MACS_peak_6688	62.84
+chr1	174294816	174295355	MACS_peak_528	124.08
+chr19	41912152	41912595	MACS_peak_4394	51.03
+chr3	96217894	96218423	MACS_peak_5355	67.16
+chr8	11393666	11393996	MACS_peak_7740	77.69
+chr15	37172600	37172975	MACS_peak_3007	91.99
+chr1	173611130	173611397	MACS_peak_522	76.86
+chr1	133022808	133023128	MACS_peak_372	72.77
+chr1	88454389	88454942	MACS_peak_252	84.11
+chr5	34856205	34856831	MACS_peak_6311	57.06
+chr7	71082000	71082779	MACS_peak_7382	80.20
+chr14	63736378	63736637	MACS_peak_2648	59.25
+chr19	32843677	32843920	MACS_peak_4364	63.61
+chr3	138702613	138702924	MACS_peak_5578	62.02
+chr17	86566107	86566474	MACS_peak_3976	84.24
+chr8	96910090	96910444	MACS_peak_7966	52.74
+chr13	112430419	112430951	MACS_peak_2359	81.28
+chr10	42013834	42014255	MACS_peak_700	66.60
+chr11	31517779	31518060	MACS_peak_1118	68.02
+chr18	5101351	5101714	MACS_peak_4000	52.05
+chr9	62724326	62725109	MACS_peak_8338	201.21
+chr9	99083674	99084236	MACS_peak_8465	132.54
+chr4	134827884	134829500	MACS_peak_6036	231.64
+chr17	13498739	13499070	MACS_peak_3624	73.59
+chr2	103006169	103006579	MACS_peak_4802	67.36
+chr15	6925244	6925735	MACS_peak_2914	54.92
+chr7	53078238	53078607	MACS_peak_7352	51.59
+chr2	90910384	90910774	MACS_peak_4782	68.81
+chr14	60870155	60870663	MACS_peak_2627	98.84
+chr2	118798450	118798941	MACS_peak_4834	74.57
+chr11	100870661	100871158	MACS_peak_1523	172.41
+chr11	87562630	87563498	MACS_peak_1450	403.76
+chr1	88154721	88155315	MACS_peak_239	86.42
+chr11	83112056	83112724	MACS_peak_1391	71.38
+chr12	101425557	101426166	MACS_peak_1973	147.38
+chr6	85401368	85402272	MACS_peak_6974	228.57
+chr11	78966191	78966722	MACS_peak_1368	117.55
+chr3	129236434	129236906	MACS_peak_5523	68.23
+chr9	109777897	109778345	MACS_peak_8516	77.56
+chr3	88426615	88427427	MACS_peak_5292	205.76
+chr1	46004702	46005146	MACS_peak_69	84.01
+chr5	76126811	76127048	MACS_peak_6401	66.98
+chr10	59405079	59405490	MACS_peak_730	58.60
+chr1	9690569	9690998	MACS_peak_3	64.27
+chr11	88281205	88281634	MACS_peak_1478	51.83
+chr10	21199165	21199653	MACS_peak_652	194.61
+chr1	173433353	173434146	MACS_peak_518	161.22
+chr12	35731430	35731910	MACS_peak_1792	101.79
+chr15	38446130	38446559	MACS_peak_3017	57.57
+chr4	144679039	144679338	MACS_peak_6130	76.57
+chr10	92865497	92865758	MACS_peak_887	57.79
+chr14	121027316	121027735	MACS_peak_2865	55.30
+chr3	96530843	96531751	MACS_peak_5362	201.73
+chr16	91406386	91406908	MACS_peak_3506	89.38
+chr5	67336216	67336546	MACS_peak_6372	54.72
+chr3	89746156	89746412	MACS_peak_5305	60.79
+chr14	106991035	106991399	MACS_peak_2826	58.26
+chr1	36186077	36186421	MACS_peak_41	72.46
+chr14	66211596	66212092	MACS_peak_2670	58.08
+chr2	127911067	127911519	MACS_peak_4875	63.04
+chr8	73335210	73335509	MACS_peak_7867	62.14
+chr17	24291509	24291795	MACS_peak_3646	66.03
+chr16	92938013	92938490	MACS_peak_3549	62.73
+chr11	3279575	3280176	MACS_peak_1024	89.38
+chr6	32447109	32447460	MACS_peak_6833	67.55
+chr1	133724229	133724871	MACS_peak_379	70.44
+chr3	138152249	138152833	MACS_peak_5575	98.84
+chr1	38420121	38420414	MACS_peak_60	72.51
+chr14	55224814	55225200	MACS_peak_2579	50.34
+chr4	140624561	140624920	MACS_peak_6101	83.78
+chr2	106328336	106328622	MACS_peak_4811	57.79
+chr11	114335454	114335791	MACS_peak_1626	77.00
+chr1	133850783	133851479	MACS_peak_381	101.13
+chr15	101084784	101085109	MACS_peak_3307	66.49
+chr1	121422851	121423230	MACS_peak_328	58.94
+chr5	50093335	50093768	MACS_peak_6328	78.73
+chr17	44569507	44569864	MACS_peak_3811	119.82
+chr9	40965392	40966162	MACS_peak_8238	135.71
+chr18	57409148	57409560	MACS_peak_4116	95.69
+chr11	106227571	106228161	MACS_peak_1573	68.64
+chr12	106264328	106264856	MACS_peak_1994	115.56
+chr11	51694649	51695006	MACS_peak_1167	50.61
+chr14	73304152	73304540	MACS_peak_2723	56.45
+chr13	38249483	38249806	MACS_peak_2137	53.73
+chr17	23939899	23940349	MACS_peak_3642	104.48
+chr8	13353101	13353525	MACS_peak_7749	169.06
+chr6	134203272	134203641	MACS_peak_7145	68.62
+chr13	3869743	3870092	MACS_peak_2043	57.73
+chr14	71173919	71174385	MACS_peak_2717	65.91
+chr15	8711544	8712011	MACS_peak_2923	52.74
+chr14	60883642	60884282	MACS_peak_2628	73.32
+chr6	100238263	100238692	MACS_peak_7041	79.31
+chr18	43246353	43246775	MACS_peak_4094	54.45
+chr3	32427840	32428279	MACS_peak_5142	65.39
+chr4	114176976	114177339	MACS_peak_5890	69.10
+chr15	24413374	24413825	MACS_peak_2953	72.47
+chr17	24388206	24388689	MACS_peak_3648	52.74
+chr2	31983332	31984086	MACS_peak_4626	138.76
+chr1	82784012	82784449	MACS_peak_226	85.75
+chr11	115527669	115527986	MACS_peak_1642	74.85
+chr4	133958319	133958921	MACS_peak_6017	111.16
+chr3	33698778	33699287	MACS_peak_5144	72.46
+chr14	122276884	122277969	MACS_peak_2882	101.55
+chr12	87310571	87310976	MACS_peak_1938	95.23
+chr13	58545231	58545547	MACS_peak_2208	75.30
+chr4	151382308	151383335	MACS_peak_6186	211.80
+chr4	107730838	107731131	MACS_peak_5875	76.86
+chr7	127973750	127974191	MACS_peak_7591	56.60
+chr13	51831495	51831924	MACS_peak_2177	53.62
+chr6	113256331	113257237	MACS_peak_7059	493.29
+chr18	75366936	75367315	MACS_peak_4186	69.64
+chr8	83893580	83893897	MACS_peak_7903	64.52
+chr6	82852344	82853038	MACS_peak_6953	269.15
+chr5	123271183	123271619	MACS_peak_6591	123.24
+chr14	47344721	47345104	MACS_peak_2524	52.77
+chr3	152575073	152575476	MACS_peak_5646	52.74
+chr3	145596911	145597202	MACS_peak_5612	83.77
+chr9	63985221	63985609	MACS_peak_8354	62.36
+chr1	58851809	58852216	MACS_peak_124	60.72
+chr4	119140346	119140886	MACS_peak_5928	82.34
+chr17	24486224	24486632	MACS_peak_3649	78.65
+chr6	34614205	34614765	MACS_peak_6840	62.73
+chr17	50210060	50210574	MACS_peak_3861	61.75
+chr18	31945760	31946081	MACS_peak_4043	57.12
+chr17	23680231	23680767	MACS_peak_3638	53.90
+chr15	38129140	38129536	MACS_peak_3012	59.66
+chr8	113782043	113782424	MACS_peak_8040	69.49
+chr18	36388485	36388844	MACS_peak_4075	60.49
+chr14	35176361	35177236	MACS_peak_2498	90.81
+chr15	58986223	58986533	MACS_peak_3088	76.07
+chr15	38230489	38231164	MACS_peak_3015	108.56
+chr2	26207239	26207961	MACS_peak_4567	93.92
+chr17	31700930	31701353	MACS_peak_3750	79.51
+chr14	69764756	69765167	MACS_peak_2692	66.11
+chr1	82630866	82631590	MACS_peak_222	118.15
+chr13	63463080	63463547	MACS_peak_2222	137.87
+chr9	88333602	88334003	MACS_peak_8442	170.03
+chr1	108414102	108414881	MACS_peak_317	194.11
+chr17	71202093	71202486	MACS_peak_3924	71.93
+chr2	4397930	4398339	MACS_peak_4456	96.07
+chr19	6313436	6313784	MACS_peak_4261	67.80
+chr17	47430218	47430545	MACS_peak_3836	57.96
+chr1	88383641	88384502	MACS_peak_245	82.34
+chr15	99307421	99307832	MACS_peak_3292	54.82
+chr10	87525443	87525844	MACS_peak_870	52.21
+chr1	137560338	137560774	MACS_peak_409	84.01
+chr2	98177089	98177336	MACS_peak_4795	66.43
+chr6	146904425	146904852	MACS_peak_7174	57.52
+chr3	88489494	88489890	MACS_peak_5293	71.68
+chrX	160870684	160870964	MACS_peak_8672	76.86
+chr5	96978763	96979127	MACS_peak_6459	66.50
+chr11	117515601	117516136	MACS_peak_1677	87.38
+chr8	129497368	129498236	MACS_peak_8150	121.81
+chr9	44134590	44135169	MACS_peak_8247	90.62
+chr3	157699307	157699805	MACS_peak_5678	169.27
+chr1	184472944	184473657	MACS_peak_582	98.66
+chr2	165780325	165780779	MACS_peak_5042	95.23
+chr12	52792839	52793385	MACS_peak_1814	54.49
+chr13	23855588	23856068	MACS_peak_2092	126.79
+chr18	83456033	83456433	MACS_peak_4211	68.08
+chr10	14425071	14425602	MACS_peak_620	94.14
+chr13	41582171	41582615	MACS_peak_2143	65.06
+chr10	94786191	94786440	MACS_peak_905	106.55
+chr8	109816960	109817536	MACS_peak_8026	135.33
+chr12	81878967	81879611	MACS_peak_1912	119.33
+chr7	26059620	26059929	MACS_peak_7250	118.92
+chr4	62176380	62176995	MACS_peak_5799	54.70
+chr18	53436462	53436882	MACS_peak_4104	79.74
+chr11	51502212	51502759	MACS_peak_1164	56.31
+chr19	37281848	37282416	MACS_peak_4379	68.11
+chr16	92710525	92711133	MACS_peak_3535	77.20
+chr3	120874131	120875192	MACS_peak_5483	293.69
+chr13	17696479	17697123	MACS_peak_2068	170.31
+chr19	46077680	46078113	MACS_peak_4423	65.79
+chr5	143682333	143682913	MACS_peak_6736	222.86
+chr1	154568166	154568685	MACS_peak_448	95.23
+chr9	81108308	81108891	MACS_peak_8430	145.29
+chr12	44270399	44270849	MACS_peak_1806	72.54
+chr5	106128761	106129096	MACS_peak_6489	54.49
+chr4	120903868	120904161	MACS_peak_5932	76.86
+chr3	68460731	68461147	MACS_peak_5234	137.12
+chr3	58329271	58329978	MACS_peak_5204	131.98
+chr4	151462216	151462538	MACS_peak_6188	74.40
+chr17	34059082	34059521	MACS_peak_3763	62.02
+chr15	76554091	76554392	MACS_peak_3159	72.02
+chr2	117013160	117013597	MACS_peak_4828	61.25
+chr6	8719505	8720279	MACS_peak_6785	237.86
+chr6	102485635	102486092	MACS_peak_7043	67.00
+chr18	65959921	65960294	MACS_peak_4153	65.80
+chr11	3308916	3309259	MACS_peak_1025	175.63
+chr11	44333429	44333838	MACS_peak_1139	94.32
+chr1	9933866	9934292	MACS_peak_4	151.02
+chr2	49701652	49702023	MACS_peak_4700	86.63
+chr5	147072457	147073005	MACS_peak_6754	70.79
+chr14	21993252	21993662	MACS_peak_2423	117.33
+chr14	76186318	76187158	MACS_peak_2750	90.56
+chr3	139131096	139131484	MACS_peak_5580	50.20
+chr7	26466797	26467478	MACS_peak_7255	106.79
+chr1	123464347	123464745	MACS_peak_335	70.93
+chr7	132615669	132615970	MACS_peak_7605	72.43
+chr10	86169149	86169550	MACS_peak_867	103.08
+chr13	49863349	49863865	MACS_peak_2174	74.46
+chr7	86508733	86509023	MACS_peak_7431	62.85
+chr11	77378344	77378754	MACS_peak_1326	82.92
+chr6	86634390	86634683	MACS_peak_6980	103.74
+chr10	116871908	116872390	MACS_peak_961	57.59
+chr14	41750272	41750682	MACS_peak_2513	95.75
+chr4	151469965	151470437	MACS_peak_6189	78.61
+chr10	60875630	60876040	MACS_peak_740	67.36
+chr6	91572361	91572723	MACS_peak_7021	70.91
+chr6	128795347	128795755	MACS_peak_7131	94.41
+chr18	58701674	58702163	MACS_peak_4120	87.63
+chr5	110698548	110699046	MACS_peak_6511	143.30
+chrX	93325546	93325822	MACS_peak_8637	91.45
+chr7	134432899	134433190	MACS_peak_7644	106.55
+chr12	29320679	29321102	MACS_peak_1769	54.01
+chr10	20124963	20125579	MACS_peak_639	217.11
+chr3	106416991	106417459	MACS_peak_5432	62.73
+chr10	33739040	33739699	MACS_peak_678	218.19
+chr8	74878095	74878556	MACS_peak_7883	94.87
+chr11	83382803	83383271	MACS_peak_1395	68.88
+chr3	58862746	58863399	MACS_peak_5208	157.00
+chr15	77685670	77686204	MACS_peak_3162	62.73
+chr14	86615475	86615825	MACS_peak_2782	87.58
+chr6	124867137	124867557	MACS_peak_7105	62.38
+chr13	55514580	55515153	MACS_peak_2199	117.94
+chr5	102254699	102255015	MACS_peak_6477	60.56
+chr2	25923796	25924257	MACS_peak_4561	56.91
+chr3	152061055	152061395	MACS_peak_5639	51.22
+chr6	86334798	86335187	MACS_peak_6976	60.17
+chr17	53706687	53707067	MACS_peak_3872	69.57
+chr7	18595991	18596427	MACS_peak_7225	65.59
+chr2	166445552	166446686	MACS_peak_5044	237.97
+chrX	12567874	12568307	MACS_peak_8607	92.18
+chr9	57187655	57188087	MACS_peak_8309	150.82
+chr3	96553930	96554350	MACS_peak_5363	58.24
+chr11	49794755	49795218	MACS_peak_1156	97.10
+chr12	86453801	86454179	MACS_peak_1931	69.72
+chr11	113544835	113545435	MACS_peak_1622	189.84
+chr14	14820991	14821374	MACS_peak_2395	50.82
+chr11	115936845	115937272	MACS_peak_1653	53.75
+chr9	66358765	66359165	MACS_peak_8375	66.29
+chr3	14944703	14945194	MACS_peak_5112	88.33
+chr4	147514611	147514949	MACS_peak_6139	51.38
+chr2	154429029	154429495	MACS_peak_4972	62.73
+chr17	21082204	21082596	MACS_peak_3632	82.04
+chr17	14304400	14304838	MACS_peak_3625	65.46
+chr5	65288018	65288287	MACS_peak_6358	72.51
+chr2	26518695	26519103	MACS_peak_4576	63.21
+chr1	9272967	9273590	MACS_peak_2	182.17
+chr9	79658076	79658784	MACS_peak_8427	64.31
+chr12	100872379	100872633	MACS_peak_1969	57.15
+chr2	128684574	128684999	MACS_peak_4881	74.42
+chr2	154558527	154559018	MACS_peak_4975	69.69
+chr19	61171106	61171535	MACS_peak_4446	66.05
+chr6	88431783	88432112	MACS_peak_7003	73.77
+chr3	136303454	136303877	MACS_peak_5562	56.77
+chr12	71328424	71329097	MACS_peak_1859	114.45
+chr6	72059443	72059831	MACS_peak_6930	67.17
+chr7	26514419	26514743	MACS_peak_7262	83.43
+chr12	88174557	88175940	MACS_peak_1945	236.16
+chr17	29530544	29530849	MACS_peak_3735	56.94
+chr10	80895647	80896054	MACS_peak_845	153.53
+chr7	82925765	82926126	MACS_peak_7421	129.84
+chr13	105607134	105607556	MACS_peak_2347	50.79
+chr10	19311634	19312299	MACS_peak_631	73.24
+chr11	113667257	113667748	MACS_peak_1624	426.44
+chr17	29301427	29301965	MACS_peak_3729	91.69
+chr3	28680019	28680490	MACS_peak_5135	89.03
+chr12	70548827	70549327	MACS_peak_1849	143.57
+chr9	57758663	57758985	MACS_peak_8322	57.05
+chr12	55963541	55963840	MACS_peak_1831	67.73
+chr10	80814336	80815092	MACS_peak_838	73.53
+chr9	106106639	106107165	MACS_peak_8487	92.77
+chr18	31948500	31948917	MACS_peak_4044	115.28
+chr3	97817136	97817796	MACS_peak_5372	108.56
+chr5	115608262	115608636	MACS_peak_6540	61.95
+chr1	92677004	92677359	MACS_peak_270	56.79
+chr4	154536407	154536875	MACS_peak_6204	179.88
+chr11	78866726	78867449	MACS_peak_1362	160.60
+chr1	58502152	58502677	MACS_peak_120	107.68
+chr1	78654241	78654789	MACS_peak_210	256.08
+chr17	91266249	91266654	MACS_peak_3987	67.72
+chr10	80392804	80393085	MACS_peak_829	60.79
+chr9	123168920	123169217	MACS_peak_8575	89.95
+chr18	64648305	64648906	MACS_peak_4135	50.43
+chr16	45025492	45026112	MACS_peak_3450	84.01
+chr11	120459679	120460020	MACS_peak_1701	71.31
+chr13	19606884	19607473	MACS_peak_2072	72.34
+chr7	134324277	134324565	MACS_peak_7637	91.45
+chr12	32908423	32908906	MACS_peak_1781	89.00
+chr15	24842100	24842543	MACS_peak_2955	75.93
+chr18	6489913	6490350	MACS_peak_4007	97.14
+chr14	122247568	122247901	MACS_peak_2881	54.94
+chr10	98856446	98856735	MACS_peak_921	64.17
+chr1	182741146	182741385	MACS_peak_566	91.45
+chr4	45045503	45045718	MACS_peak_5770	81.78
+chr1	95531960	95532820	MACS_peak_290	140.79
+chr2	152643374	152643779	MACS_peak_4954	153.80
+chr10	79317823	79318310	MACS_peak_795	114.99
+chr5	106146989	106147692	MACS_peak_6490	267.90
+chr4	34634278	34634904	MACS_peak_5728	116.61
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/examples/nib/test_batch_fasta.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,9 @@
+from chipsequtil import get_org_settings, BEDFile
+from chipsequtil.nib import NibDB
+from pprint import pprint
+
+genome_dir = get_org_settings('mm9')['genome_dir']
+db = NibDB(nib_dirs=[genome_dir])
+fasta_headers,seqs = db.get_fasta_from_bed('shuffled_peaks.bed')
+
+pprint(seqs[:10])
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/examples/nib/test_nib_db.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,47 @@
+from chipsequtil import get_org_settings, BEDFile
+from chipsequtil.nib import NibDB
+from pprint import pprint
+
+# see `org_settings.py -h` for more info on get_org_settings(<organism>) function
+genome_dir = get_org_settings('mm9')['genome_dir']
+
+# NibDB is an interface to a collection of nib files, typically corresponding
+# to chromosomes of a genome
+
+# example with only one nib file
+print 'NibDB with a single nib file'
+db = NibDB(nib_fns=[genome_dir+'/chr1.nib'])
+
+print 'NibDB info:'
+pprint(dict(db.db_info))
+
+# get a fasta record for some sequence
+print 'Example fasta record: chr1:1e8-1e8+100'
+print db.get_fasta('chr1',1e8,1e8+100)
+
+# get just the sequence
+print 'Same example, only sequence:'
+print db.get_seq('chr1',1e8,1e8+100)
+print
+
+
+# example with a directory of nib files
+print 'NibDB with a directory of nib files'
+db = NibDB(nib_dirs=[genome_dir])
+
+# get a fasta record for some sequence
+print 'Example fasta record: chr1:1e8-1e8+100'
+print db.get_fasta('chr1',1e8,1e8+100)
+
+print 'Example fasta record: chr1:1e8-1e8+100'
+print db.get_fasta('chr2',1e8,1e8+100)
+
+print 'Example fasta record: chr1:1e8-1e8+100'
+print db.get_fasta('chrX',1e8,1e8+100)
+
+
+# example of fetching all sequences from a bed file
+fasta_headers,seqs = db.get_fasta_from_bed('shuffled_peaks.bed')
+
+print 'Num. peaks:',len(open('shuffled_peaks.bed').readlines())
+pprint(seqs[:10])
Binary file chipsequtil-master/examples/seq/._test_chipsequtil_seq.py has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/examples/seq/test_chipsequtil_seq.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,19 @@
+from StringIO import StringIO
+from chipsequtil.seq import FASTAFile, FASTQFile
+
+fasta_str = StringIO(">seq1\nACATAGGGAT\n>seq2\nTTATNTAGATA\n")
+fasta_f = FASTAFile(fasta_str)
+print fasta_f.headers
+
+print "[r for r in fasta_f]", [r for r in fasta_f]
+print "fasta_f['seq1']", fasta_f['seq1']
+print "fasta_f.headers", fasta_f.headers
+print "fasta_f.sequences", fasta_f.sequences
+
+fastq_str = StringIO("@seq1\nACATAGGGAT\n+seq2\nY^_cccQYJQ\n@seq2\nTTATNTAGATA\n+seq2\nY^_cJcQQJQ")
+fastq_f = FASTQFile(fastq_str)
+print "[r for r in fastq_f]", [r for r in fastq_f]
+print "fastq_f['seq1']", fastq_f['seq1']
+print "fastq_f.headers", fastq_f.headers
+print "fastq_f.sequences", fastq_f.sequences
+print "fastq_f.quals", fastq_f.quals
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/ez_setup.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,284 @@
+#!python
+"""Bootstrap setuptools installation
+
+If you want to use setuptools in your package's setup.py, just include this
+file in the same directory with it, and add this to the top of your setup.py::
+
+    from ez_setup import use_setuptools
+    use_setuptools()
+
+If you want to require a specific version of setuptools, set a download
+mirror, or use an alternate download directory, you can do so by supplying
+the appropriate options to ``use_setuptools()``.
+
+This file can also be run as a script to install or upgrade setuptools.
+"""
+import sys
+DEFAULT_VERSION = "0.6c11"
+DEFAULT_URL     = "http://pypi.python.org/packages/%s/s/setuptools/" % sys.version[:3]
+
+md5_data = {
+    'setuptools-0.6b1-py2.3.egg': '8822caf901250d848b996b7f25c6e6ca',
+    'setuptools-0.6b1-py2.4.egg': 'b79a8a403e4502fbb85ee3f1941735cb',
+    'setuptools-0.6b2-py2.3.egg': '5657759d8a6d8fc44070a9d07272d99b',
+    'setuptools-0.6b2-py2.4.egg': '4996a8d169d2be661fa32a6e52e4f82a',
+    'setuptools-0.6b3-py2.3.egg': 'bb31c0fc7399a63579975cad9f5a0618',
+    'setuptools-0.6b3-py2.4.egg': '38a8c6b3d6ecd22247f179f7da669fac',
+    'setuptools-0.6b4-py2.3.egg': '62045a24ed4e1ebc77fe039aa4e6f7e5',
+    'setuptools-0.6b4-py2.4.egg': '4cb2a185d228dacffb2d17f103b3b1c4',
+    'setuptools-0.6c1-py2.3.egg': 'b3f2b5539d65cb7f74ad79127f1a908c',
+    'setuptools-0.6c1-py2.4.egg': 'b45adeda0667d2d2ffe14009364f2a4b',
+    'setuptools-0.6c10-py2.3.egg': 'ce1e2ab5d3a0256456d9fc13800a7090',
+    'setuptools-0.6c10-py2.4.egg': '57d6d9d6e9b80772c59a53a8433a5dd4',
+    'setuptools-0.6c10-py2.5.egg': 'de46ac8b1c97c895572e5e8596aeb8c7',
+    'setuptools-0.6c10-py2.6.egg': '58ea40aef06da02ce641495523a0b7f5',
+    'setuptools-0.6c11-py2.3.egg': '2baeac6e13d414a9d28e7ba5b5a596de',
+    'setuptools-0.6c11-py2.4.egg': 'bd639f9b0eac4c42497034dec2ec0c2b',
+    'setuptools-0.6c11-py2.5.egg': '64c94f3bf7a72a13ec83e0b24f2749b2',
+    'setuptools-0.6c11-py2.6.egg': 'bfa92100bd772d5a213eedd356d64086',
+    'setuptools-0.6c2-py2.3.egg': 'f0064bf6aa2b7d0f3ba0b43f20817c27',
+    'setuptools-0.6c2-py2.4.egg': '616192eec35f47e8ea16cd6a122b7277',
+    'setuptools-0.6c3-py2.3.egg': 'f181fa125dfe85a259c9cd6f1d7b78fa',
+    'setuptools-0.6c3-py2.4.egg': 'e0ed74682c998bfb73bf803a50e7b71e',
+    'setuptools-0.6c3-py2.5.egg': 'abef16fdd61955514841c7c6bd98965e',
+    'setuptools-0.6c4-py2.3.egg': 'b0b9131acab32022bfac7f44c5d7971f',
+    'setuptools-0.6c4-py2.4.egg': '2a1f9656d4fbf3c97bf946c0a124e6e2',
+    'setuptools-0.6c4-py2.5.egg': '8f5a052e32cdb9c72bcf4b5526f28afc',
+    'setuptools-0.6c5-py2.3.egg': 'ee9fd80965da04f2f3e6b3576e9d8167',
+    'setuptools-0.6c5-py2.4.egg': 'afe2adf1c01701ee841761f5bcd8aa64',
+    'setuptools-0.6c5-py2.5.egg': 'a8d3f61494ccaa8714dfed37bccd3d5d',
+    'setuptools-0.6c6-py2.3.egg': '35686b78116a668847237b69d549ec20',
+    'setuptools-0.6c6-py2.4.egg': '3c56af57be3225019260a644430065ab',
+    'setuptools-0.6c6-py2.5.egg': 'b2f8a7520709a5b34f80946de5f02f53',
+    'setuptools-0.6c7-py2.3.egg': '209fdf9adc3a615e5115b725658e13e2',
+    'setuptools-0.6c7-py2.4.egg': '5a8f954807d46a0fb67cf1f26c55a82e',
+    'setuptools-0.6c7-py2.5.egg': '45d2ad28f9750e7434111fde831e8372',
+    'setuptools-0.6c8-py2.3.egg': '50759d29b349db8cfd807ba8303f1902',
+    'setuptools-0.6c8-py2.4.egg': 'cba38d74f7d483c06e9daa6070cce6de',
+    'setuptools-0.6c8-py2.5.egg': '1721747ee329dc150590a58b3e1ac95b',
+    'setuptools-0.6c9-py2.3.egg': 'a83c4020414807b496e4cfbe08507c03',
+    'setuptools-0.6c9-py2.4.egg': '260a2be2e5388d66bdaee06abec6342a',
+    'setuptools-0.6c9-py2.5.egg': 'fe67c3e5a17b12c0e7c541b7ea43a8e6',
+    'setuptools-0.6c9-py2.6.egg': 'ca37b1ff16fa2ede6e19383e7b59245a',
+}
+
+import sys, os
+try: from hashlib import md5
+except ImportError: from md5 import md5
+
+def _validate_md5(egg_name, data):
+    if egg_name in md5_data:
+        digest = md5(data).hexdigest()
+        if digest != md5_data[egg_name]:
+            print >>sys.stderr, (
+                "md5 validation of %s failed!  (Possible download problem?)"
+                % egg_name
+            )
+            sys.exit(2)
+    return data
+
+def use_setuptools(
+    version=DEFAULT_VERSION, download_base=DEFAULT_URL, to_dir=os.curdir,
+    download_delay=15
+):
+    """Automatically find/download setuptools and make it available on sys.path
+
+    `version` should be a valid setuptools version number that is available
+    as an egg for download under the `download_base` URL (which should end with
+    a '/').  `to_dir` is the directory where setuptools will be downloaded, if
+    it is not already available.  If `download_delay` is specified, it should
+    be the number of seconds that will be paused before initiating a download,
+    should one be required.  If an older version of setuptools is installed,
+    this routine will print a message to ``sys.stderr`` and raise SystemExit in
+    an attempt to abort the calling script.
+    """
+    was_imported = 'pkg_resources' in sys.modules or 'setuptools' in sys.modules
+    def do_download():
+        egg = download_setuptools(version, download_base, to_dir, download_delay)
+        sys.path.insert(0, egg)
+        import setuptools; setuptools.bootstrap_install_from = egg
+    try:
+        import pkg_resources
+    except ImportError:
+        return do_download()       
+    try:
+        pkg_resources.require("setuptools>="+version); return
+    except pkg_resources.VersionConflict, e:
+        if was_imported:
+            print >>sys.stderr, (
+            "The required version of setuptools (>=%s) is not available, and\n"
+            "can't be installed while this script is running. Please install\n"
+            " a more recent version first, using 'easy_install -U setuptools'."
+            "\n\n(Currently using %r)"
+            ) % (version, e.args[0])
+            sys.exit(2)
+        else:
+            del pkg_resources, sys.modules['pkg_resources']    # reload ok
+            return do_download()
+    except pkg_resources.DistributionNotFound:
+        return do_download()
+
+def download_setuptools(
+    version=DEFAULT_VERSION, download_base=DEFAULT_URL, to_dir=os.curdir,
+    delay = 15
+):
+    """Download setuptools from a specified location and return its filename
+
+    `version` should be a valid setuptools version number that is available
+    as an egg for download under the `download_base` URL (which should end
+    with a '/'). `to_dir` is the directory where the egg will be downloaded.
+    `delay` is the number of seconds to pause before an actual download attempt.
+    """
+    import urllib2, shutil
+    egg_name = "setuptools-%s-py%s.egg" % (version,sys.version[:3])
+    url = download_base + egg_name
+    saveto = os.path.join(to_dir, egg_name)
+    src = dst = None
+    if not os.path.exists(saveto):  # Avoid repeated downloads
+        try:
+            from distutils import log
+            if delay:
+                log.warn("""
+---------------------------------------------------------------------------
+This script requires setuptools version %s to run (even to display
+help).  I will attempt to download it for you (from
+%s), but
+you may need to enable firewall access for this script first.
+I will start the download in %d seconds.
+
+(Note: if this machine does not have network access, please obtain the file
+
+   %s
+
+and place it in this directory before rerunning this script.)
+---------------------------------------------------------------------------""",
+                    version, download_base, delay, url
+                ); from time import sleep; sleep(delay)
+            log.warn("Downloading %s", url)
+            src = urllib2.urlopen(url)
+            # Read/write all in one block, so we don't create a corrupt file
+            # if the download is interrupted.
+            data = _validate_md5(egg_name, src.read())
+            dst = open(saveto,"wb"); dst.write(data)
+        finally:
+            if src: src.close()
+            if dst: dst.close()
+    return os.path.realpath(saveto)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+def main(argv, version=DEFAULT_VERSION):
+    """Install or upgrade setuptools and EasyInstall"""
+    try:
+        import setuptools
+    except ImportError:
+        egg = None
+        try:
+            egg = download_setuptools(version, delay=0)
+            sys.path.insert(0,egg)
+            from setuptools.command.easy_install import main
+            return main(list(argv)+[egg])   # we're done here
+        finally:
+            if egg and os.path.exists(egg):
+                os.unlink(egg)
+    else:
+        if setuptools.__version__ == '0.0.1':
+            print >>sys.stderr, (
+            "You have an obsolete version of setuptools installed.  Please\n"
+            "remove it from your system entirely before rerunning this script."
+            )
+            sys.exit(2)
+
+    req = "setuptools>="+version
+    import pkg_resources
+    try:
+        pkg_resources.require(req)
+    except pkg_resources.VersionConflict:
+        try:
+            from setuptools.command.easy_install import main
+        except ImportError:
+            from easy_install import main
+        main(list(argv)+[download_setuptools(delay=0)])
+        sys.exit(0) # try to force an exit
+    else:
+        if argv:
+            from setuptools.command.easy_install import main
+            main(argv)
+        else:
+            print "Setuptools version",version,"or greater has been installed."
+            print '(Run "ez_setup.py -U setuptools" to reinstall or upgrade.)'
+
+def update_md5(filenames):
+    """Update our built-in md5 registry"""
+
+    import re
+
+    for name in filenames:
+        base = os.path.basename(name)
+        f = open(name,'rb')
+        md5_data[base] = md5(f.read()).hexdigest()
+        f.close()
+
+    data = ["    %r: %r,\n" % it for it in md5_data.items()]
+    data.sort()
+    repl = "".join(data)
+
+    import inspect
+    srcfile = inspect.getsourcefile(sys.modules[__name__])
+    f = open(srcfile, 'rb'); src = f.read(); f.close()
+
+    match = re.search("\nmd5_data = {\n([^}]+)}", src)
+    if not match:
+        print >>sys.stderr, "Internal error!"
+        sys.exit(2)
+
+    src = src[:match.start(1)] + repl + src[match.end(1):]
+    f = open(srcfile,'w')
+    f.write(src)
+    f.close()
+
+
+if __name__=='__main__':
+    if len(sys.argv)>2 and sys.argv[1]=='--md5update':
+        update_md5(sys.argv[2:])
+    else:
+        main(sys.argv[1:])
+
+
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/install.sh	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+# this script installs chipsequtils into /usr/local on the cluster nodes
+# since the cluster nodes do not have root write access to the network
+# volumes from nodes other than node 9, sudo ... doesn't work with
+# setuptools because it writes egg_info to the source directory and I
+# can't figure out how to get it to write to a local directory.
+#
+# this script copies the entire chipsequtil source tree to /tmp on the
+# local machine, runs sudo ./setup.py install --prefix=/usr/local, and,
+# on success, deletes the temporary source directory
+#
+# it _must_ be run from the source directory
+
+TMPDIR="/tmp/chipsequtil_tmp_$(date +%F)"
+if [ ! -d $TMPDIR ]; then
+    echo "temporary source dir $TMPDIR does not exist, creating"
+    mkdir $TMPDIR
+fi
+
+cd ../
+echo "copying source tree to $TMPDIR"
+cp -vr -t $TMPDIR chipsequtil/{setup.*,ez_setup.py,src,scripts,setuptools*}
+cd $TMPDIR
+echo "cd'ed to $PWD, installing"
+sudo ./setup.py install --prefix=/usr/local
+if [ $? -eq 0 ]; then
+        echo "install successful, removing $TMPDIR"
+        cd
+        sudo rm -r $TMPDIR
+fi
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/org_settings.cfg	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,120 @@
+# This file is used by org_settings.py to return sets of paths/settings like
+# genomic sequence files, genome sizes, etc.  It is formatted according to
+# Python's ConfigParser.CongifParser specification:
+#
+# http://docs.python.org/library/configparser.html
+#
+# Before installation, add any system-specific settings to the categories below,
+# where categories correspond to organism/genome names, creating new category
+# headings where desired.
+#
+# User-specific organisms and settings may be specified in:
+#
+#   os.path.expanduser('~/.org_settings.cfg')
+#
+# with the same format.  Settings in user configuration files override system-wide
+# settings.
+#
+# A minimal organism configuration requires at least genome_dir and genome_size,
+# other settings may be required for different tools (e.g. theme_* for THEME.py)
+#
+# field values can contain no spaces if they are to be exported to the command line
+# (i.e. with org_settings.py)
+
+[hg18]
+description=UCSC hg18 (March '06 build) with full TRANSFAC hypothesis set
+genome=hg18
+genome_dir=/nfs/genomes/human_gp_mar_06
+genome_size=2700000000
+ucsc_chrom_sizes=%(genome_dir)s/%(genome)s.chrom.sizes
+annotation_path=%(genome_dir)s/anno/refFlat-hg18-2010-08-17.txt
+refgene_anno_path=%(genome_dir)s/anno/refFlat-hg18-2010-08-17.txt
+known_gene_anno_path=%(genome_dir)s/anno/knownGene-hg18-2010-08-17.txt
+known_gene_xref_path=%(genome_dir)s/anno/kgXref-hg18-2010-08-17.txt
+theme_hypotheses=/nfs/vendata/cwng/TRANSFAC/2010_transfac_vert_all_filtic9.tamo
+theme_markov=%(genome_dir)s/hg18_promoters_3000_1000.markov
+
+[hg18clust]
+description=UCSC hg18 (March '06 build) with clustered TRANSFAC hypothesis set
+genome=hg18
+genome=hg18
+genome_dir=/nfs/genomes/human_gp_mar_06
+genome_size=2700000000
+ucsc_chrom_sizes=%(genome_dir)s/%(genome)s.chrom.sizes
+annotation_path=%(genome_dir)s/anno/refFlat-hg18-2010-08-17.txt
+refgene_anno_path=%(genome_dir)s/anno/refFlat-hg18-2010-08-17.txt
+known_gene_anno_path=%(genome_dir)s/anno/knownGene-hg18-2010-08-17.txt
+known_gene_xref_path=%(genome_dir)s/anno/kgXref-hg18-2010-08-17.txt
+theme_hypotheses=/nfs/vendata/cwng/TRANSFAC/2010_transfac_vert_all_filtic9_sym_clus4.tamo
+theme_markov=%(genome_dir)s/hg18_promoters_3000_1000.markov
+weeder_freqfiles_path=%(genome_dir)s/weeder
+
+[hg19]
+description=UCSC hg19 (Feb '09 build) with full TRANSFAC hypothesis set
+genome=hg19
+genome_dir=/nfs/genomes/human_gp_feb_09
+genome_size=2700000000
+ucsc_chrom_sizes=%(genome_dir)s/%(genome)s.chrom.sizes
+annotation_path=%(genome_dir)s/anno/refFlat-%(genome)s-2011-01-04.txt
+refgene_anno_path=%(genome_dir)s/anno/refFlat-%(genome)s-2011-01-04.txt
+known_gene_anno_path=%(genome_dir)s/anno/knownGene-%(genome)s-2011-01-04.txt
+known_gene_xref_path=%(genome_dir)s/anno/kgXref-%(genome)s-2011-01-04.txt
+theme_hypotheses=/nfs/vendata/cwng/TRANSFAC/2010_transfac_vert_all_filtic9.tamo
+theme_markov=%(genome_dir)s/hg19_promoters_3000_1000.markov
+
+[hg19clust]
+description=UCSC hg19 (Feb '09 build) with clustered TRANSFAC hypothesis set
+genome=hg19
+genome_dir=/nfs/genomes/human_gp_feb_09
+genome_size=2700000000
+ucsc_chrom_sizes=%(genome_dir)s/%(genome)s.chrom.sizes
+annotation_path=%(genome_dir)s/anno/refFlat-%(genome)s-2011-01-04.txt
+refgene_anno_path=%(genome_dir)s/anno/refFlat-%(genome)s-2011-01-04.txt
+known_gene_anno_path=%(genome_dir)s/anno/knownGene-%(genome)s-2011-01-04.txt
+known_gene_xref_path=%(genome_dir)s/anno/kgXref-%(genome)s-2011-01-04.txt
+theme_hypotheses=/nfs/vendata/cwng/TRANSFAC/2010_transfac_vert_all_filtic9_sym_clus4.tamo
+theme_markov=%(genome_dir)s/hg19_promoters_3000_1000.markov
+
+[mm9]
+description=UCSC mm9 (July '07 build) with full TRANSFAC hypothesis set
+genome=mm9
+genome_dir=/nfs/genomes/mouse_gp_jul_07
+genome_size=2107000000
+ucsc_chrom_sizes=%(genome_dir)s/%(genome)s.chrom.sizes
+annotation_path=%(genome_dir)s/anno/refFlat-%(genome)s.txt
+refgene_anno_path=%(genome_dir)s/anno/refFlat-%(genome)s.txt
+known_gene_anno_path=%(genome_dir)s/anno/knownGene-%(genome)s.txt
+known_gene_xref_path=%(genome_dir)s/anno/kgXref-%(genome)s.txt
+affy_to_known_path=%(genome_dir)s/anno/knownToMOE43-%(genome)s.txt
+theme_hypotheses=/nfs/vendata/cwng/TRANSFAC/2010_transfac_vert_all_filtic9.tamo
+theme_markov=/nfs/data/cwng/chipseq/hypotheses/Mouse.markov
+
+[mm9clust]
+description=UCSC mm9 (July '07 build) with clustered TRANSFAC hypothesis set
+genome=mm9
+genome_dir=/nfs/genomes/mouse_gp_jul_07
+genome_size=2107000000
+ucsc_chrom_sizes=%(genome_dir)s/%(genome)s.chrom.sizes
+annotation_path=%(genome_dir)s/anno/refFlat-%(genome)s.txt
+refgene_anno_path=%(genome_dir)s/anno/refFlat-%(genome)s.txt
+known_gene_anno_path=%(genome_dir)s/anno/knownGene-%(genome)s.txt
+known_gene_xref_path=%(genome_dir)s/anno/kgXref-%(genome)s.txt
+affy_to_known_path=%(genome_dir)s/anno/knownToMOE430-%(genome)s.txt
+theme_hypotheses=/nfs/vendata/cwng/TRANSFAC/2010_transfac_vert_all_filtic9_sym_clus4.tamo
+theme_markov=/nfs/data/cwng/chipseq/hypotheses/Mouse.markov
+
+[mm8]
+description=UCSC mm8 (March '07 build) with full TRANSFAC hypothesis set
+genome=mm8
+genome_dir=/nfs/genomes/mouse_gp_mar_06
+genome_size=2107000000
+ucsc_chrom_sizes=%(genome_dir)s/%(genome)s.chrom.sizes
+refgene_anno_path=%(genome_dir)s/anno/refFlat-2010-08-26.txt
+annotation_path=%(refgene_anno_path)s
+known_gene_anno_path=%(genome_dir)s/anno/knownGene-2010-08-26.txt
+known_gene_xref_path=%(genome_dir)s/anno/kgXref-2010-08-26.txt
+affy_to_known_path=%(genome_dir)s/anno/knownToMOE430-2010-08-26.txt
+theme_hypotheses=/nfs/vendata/cwng/TRANSFAC/2010_transfac_vert_all_filtic9.tamo
+theme_markov=/nfs/data/cwng/chipseq/hypotheses/Mouse.markov
+
+# others...
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/org_settings.cfg.sample	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,39 @@
+# This file is used by org_settings.py to return sets of paths/settings like
+# genomic sequence files, genome sizes, etc.  It is formatted according to
+# Python's ConfigParser.CongifParser specification:
+#
+# http://docs.python.org/library/configparser.html
+#
+# Before installation, add any system-specific settings to the categories below,
+# where categories correspond to organism/genome names, creating new category
+# headings where desired.
+#
+# User-specific organisms and settings may be specified in:
+#
+#   os.path.expanduser('~/.org_settings.cfg')
+#
+# with the same format.  Settings in user configuration files override system-wide
+# settings.
+#
+# A minimal organism configuration requires at least genome_dir and genome_size,
+# other settings may be required for different tools (e.g. theme_* for THEME.py)
+#
+# field values can contain no spaces if they are to be exported to the command line
+# (i.e. with org_settings.py)
+
+[human]
+description=
+genome_dir=
+genome_size=
+annotation_path=
+theme_hyp=
+theme_markov=
+# others...
+
+[mouse]
+genome_dir=/nfs/genomes/mouse_gp_jul_07
+genome_size=2107000000
+annotation_path=%(genome_dir)s/anno/refFlat.txt
+theme_hyp=/nfs/vendata/cwng/motifs/TRANSFAC_vert_filt9_clus4_trunc.tamo
+theme_markov=/nfs/data/cwng/chipseq/hypotheses/Mouse.markov
+# others...
Binary file chipsequtil-master/scripts/._THEME.sh has changed
Binary file chipsequtil-master/scripts/._build_chipseq_infosite.py has changed
Binary file chipsequtil-master/scripts/._chipseq_pipeline.py has changed
Binary file chipsequtil-master/scripts/._chipseq_pipeline_wo_ctrl.py has changed
Binary file chipsequtil-master/scripts/._combine_gerald_stats.py has changed
Binary file chipsequtil-master/scripts/._compare_microarray_binding.py has changed
Binary file chipsequtil-master/scripts/._construct_bg_fasta.py has changed
Binary file chipsequtil-master/scripts/._create_pipeline_script.py has changed
Binary file chipsequtil-master/scripts/._extract_promoters.py has changed
Binary file chipsequtil-master/scripts/._filter_bed_by_position_count.py has changed
Binary file chipsequtil-master/scripts/._filter_gps_peaks.py has changed
Binary file chipsequtil-master/scripts/._filter_macs_peaks.py has changed
Binary file chipsequtil-master/scripts/._filter_mapped_known_genes.py has changed
Binary file chipsequtil-master/scripts/._generate_stats_doc.py has changed
Binary file chipsequtil-master/scripts/._gerald_stats.py has changed
Binary file chipsequtil-master/scripts/._gerald_to_bed.py has changed
Binary file chipsequtil-master/scripts/._integrate_macs_ucsc.py has changed
Binary file chipsequtil-master/scripts/._join_mapped_known_genes.py has changed
Binary file chipsequtil-master/scripts/._kg_to_gff.py has changed
Binary file chipsequtil-master/scripts/._map_intervals.py has changed
Binary file chipsequtil-master/scripts/._map_peaks_to_genes.py has changed
Binary file chipsequtil-master/scripts/._map_peaks_to_known_genes.py has changed
Binary file chipsequtil-master/scripts/._motif_scan.py has changed
Binary file chipsequtil-master/scripts/._nibFrag.py has changed
Binary file chipsequtil-master/scripts/._org_settings.py has changed
Binary file chipsequtil-master/scripts/._peaks_to_fasta.py has changed
Binary file chipsequtil-master/scripts/._plot_peak_loc_dist.py has changed
Binary file chipsequtil-master/scripts/._plot_pos_vs_neg_peaks.py has changed
Binary file chipsequtil-master/scripts/._probeset_to_known_gene.py has changed
Binary file chipsequtil-master/scripts/._rejection_sample_fasta.py has changed
Binary file chipsequtil-master/scripts/._sort_bed.py has changed
Binary file chipsequtil-master/scripts/._split_file.py has changed
Binary file chipsequtil-master/scripts/._split_qsub.py has changed
Binary file chipsequtil-master/scripts/._wait_for_jobid.py has changed
Binary file chipsequtil-master/scripts/._wait_for_qsub.py has changed
Binary file chipsequtil-master/scripts/._wqsub.py has changed
Binary file chipsequtil-master/scripts/._wqsub_drmaa.py has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/scripts/THEME.sh	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,177 @@
+#!/bin/bash
+
+THEME_EXE=/nfs/data/cwng/archive/cvEM.64/THEME_edit.py
+
+OPT_SPEC='
+{
+"NAME": "THEME.sh",
+"DESC": "Run old THEME version",
+"ARGS": ["FG_FASTA","BG_FASTA","HYP_FN","MARKOV"],
+"OPTS": {
+    "CV":{"LONG":"--cv","DEFAULT":5,"TYPE":"int","HELP":"number of cross validation folds [default:%default]"},
+    "NOREFINE":{"LONG":"--no-refine","ACTION":"store_true","HELP":"do not run with refinement"},
+    "BETA":{"LONG":"--beta","DEFAULT":0.7,"TYPE":"float","HELP":"beta parameter to use [default:%default]"},
+    "DELTA":{"LONG":"--delta","DEFAULT":0.001,"TYPE":"float","HELP":"delta parameter to use [default:%default]"},
+    "RANDOMIZE":{"LONG":"--randomization","ACTION":"store_true","HELP":"run randomization"},
+    "MOTIF_FN":{"LONG":"--motif-file","DEFAULT":"dummy.out","HELP":"filename to write motif results to [default:%default]"},
+    "OUTPUT_FN":{"LONG":"--output-filename","DEFAULT":"dummy.txt","HELP":"filename to write motif results to [default:%default]"},
+    "RANDOM_FN":{"LONG":"--random-output","DEFAULT":"random.txt","HELP":"filename to write motif results to [default:%default]"},
+    "DUMP":{"LONG":"--dump","ACTION":"store_true","HELP":"dump categtories to file"},
+    "REM_COM":{"LONG":"--remove-common","ACTION":"store_true","HELP":"remove common sequences from analysis"},
+    "NOPARALLEL":{"LONG":"--no-parallelize","ACTION":"store_true","HELP":"do not use wqsub.py for parallelization"},
+    "INTERACTIVE":{"LONG":"--interactive","ACTION":"store_true","HELP":"run the script interactively"},
+    "HYP_INDS":{"LONG":"--hyp-indices","DEFAULT":"ALL","HELP":"0-based indices of hypotheses to run [default: %default]"},
+    "VERBOSE":{"SHORT":"-v","LONG":"--verbose","ACTION":"store_true","HELP":"print out the commands that are being run"},
+    "TRIALS":{"LONG":"--trials","HELP":"this option is here only for backwards compatibility with THEME.py"}
+    }
+}'
+OUTPUT=$(echo $OPT_SPEC | getopts.py --shell=bash -- $@)
+GETOPTS_RET=$?
+if [ $GETOPTS_RET -ne 0 ]; then
+    exit 1
+fi
+$OUTPUT
+
+INTERACTIVE_FLAG="--auto"
+if [ $INTERACTIVE != "None" ]; then
+    INTERACTIVE_FLAG=
+fi
+
+eval "$(steplist.py $INTERACTIVE_FLAG -t "Run THEME" THEME "Wait for jobs" "Combine results")"
+
+# run THEME
+OUTDIR=THEME_data
+test \! -e $OUTDIR && mkdir $OUTDIR
+
+WQSUB_EXE="wqsub.py"
+if [ $NOPARALLEL != "None" ]; then
+    WQSUB_EXE=
+fi
+
+RANDOMIZE_FLAG=
+if [ $RANDOMIZE != "None" ]; then
+    RANDOMIZE_FLAG="-randomization"
+fi
+
+RC=
+if [ $RC ]; then
+    RC='-rc'
+fi
+
+if [ $HYP_INDS != "ALL" ]; then
+    HYP_INDS=$(parse_steplist.py $HYP_INDS)
+    HYP_INDS_STATUS=$?
+    if [ $HYP_INDS_STATUS != 0 ]; then
+        echo "Incorrectly formatted argument to --hyp-indices option, aborting"
+        exit $HYP_INDS_STATUS
+    fi
+else
+    NUM_HYPS=`grep -c '^Source' $HYP_FN`
+    NUM_HYPS=$(($NUM_HYPS-1))
+    HYP_INDS=$(seq 0 $NUM_HYPS)
+fi
+
+JOBIDS=
+next_step && \
+for i in $HYP_INDS
+do
+
+    WQSUB=
+    REDIRECT=
+    if [ ! -z $WQSUB_EXE ]; then
+        WQSUB="$WQSUB_EXE --wqsub-name=THEME_$i"
+    fi
+
+    OUTPRE=$OUTDIR/$i
+
+    CMD="$WQSUB python $THEME_EXE $FG_FASTA $BG_FASTA $i \
+        -fse $HYP_FN -markov $MARKOV -cv $CV -beta $BETA \
+        -delta $DELTA -motif_file $OUTPRE.tamo -out_file $OUTPRE.txt \
+        $RC"
+    JOBID=$($WQSUB $CMD)
+    JOBIDS="$JOBID $JOBIDS"
+    if [ $VERBOSE != "None" ]; then
+        echo $WQSUB $CMD
+    fi
+
+    if [ $RANDOMIZE != "None" ]; then
+
+        WQSUB="$WQSUB_EXE --wqsub-name=THEME_rand_$i"
+
+        CMD="$WQSUB python $THEME_EXE $FG_FASTA $BG_FASTA $i \
+            -fse $HYP_FN -markov $MARKOV -cv $CV -beta $BETA \
+            -delta $DELTA -out_file ${OUTPRE}_rand_output.txt \
+            -random_file ${OUTPRE}_rand.txt $RC -randomization"
+
+        JOBID=$($WQSUB $CMD)
+        JOBIDS="$JOBID $JOBIDS"
+
+        if [ $VERBOSE != "None" ]; then
+            echo $WQSUB $CMD -randomization
+        fi
+    fi
+
+done
+
+
+# wait for jobs
+next_step && wait_for_jobid.py $JOBIDS
+
+# compile results
+next_step
+DO_COMPILE=$?
+if [ $DO_COMPILE == 0 ]; then
+
+    rm -f $MOTIF_FN && touch $MOTIF_FN
+    (
+        cd $OUTDIR
+        ls *.tamo | sort -n | xargs -n1 -I{} -t cat {} >> ../$MOTIF_FN
+    )
+
+    if [ $NOPARALLEL == "None" ]; then
+        mv -f *.{err,out} THEME_data
+    fi
+
+    if [ $RANDOMIZE != "None" ]; then
+        rm -f $RANDOM_FN && touch $RANDOM_FN
+        (
+            cd $OUTDIR
+            for ind in $HYP_INDS
+            do
+                out_fn="${ind}_rand.txt"
+                echo "Consolidating $out_fn"
+                python >> ../$RANDOM_FN << EOF
+import re
+import sys
+
+from TAMO.MotifTools import load
+
+ind = re.match('(\d+)',"$out_fn").group(1)
+
+motif = load("$HYP_FN")[int(ind)]
+
+src = motif.source.split()
+if len(src) == 0 :
+    print 'Got weird motif source: %s\n'%src
+src = src[0]+'_%s'%ind
+
+mot_str = str(motif)
+
+cverrs = []
+for l in open("$out_fn") :
+    m = re.match("trial: \d+ mean test error: (\d+\.\d+)$",l)
+    if m is not None :
+         cverrs.append(float(m.group(1)))
+
+print "\t".join([src,mot_str,str(sum(cverrs)/len(cverrs)),repr(cverrs)])
+sys.stdout.flush()
+
+EOF
+            done
+
+        )
+
+    compile_THEME_results.py $MOTIF_FN $RANDOM_FN --output=$OUTPUT_FN
+
+    fi
+fi
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/scripts/build_chipseq_infosite.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,675 @@
+#!/usr/bin/env python
+
+import getpass
+import glob
+import json
+import matplotlib
+matplotlib.use('AGG')
+import matplotlib.pyplot as mp
+import os
+import re
+import shutil
+import sys
+
+from collections import defaultdict
+from csv import reader, writer, DictReader
+from math import log
+from optparse import OptionParser
+from subprocess import call
+
+from chipsequtil import MACSFile, get_org_settings
+from reStUtil import *
+
+usage = '%prog [options] [<peak filename> <peak filename> ...]'
+parser = OptionParser(usage=usage)
+parser.add_option('-d','--dir',dest='dir',default='.',help='Source directory [default: %default]')
+parser.add_option('-n','--name',dest='name',help='Experiment name [default: current directory name]')
+parser.add_option('--skip-motif-scan',dest='skip_motif_scan',action='store_true',help="skip motif_scan.py, but still build motifs into document (assumes motif_scan.py was previously run)")
+parser.add_option('--skip-motif-stuff',dest='skip_motif_stuff',action='store_true',help="motif stuff takes a long time, manually skip it if no motif results are available or you don't care about them")
+
+{
+  "experiment path": "/nfs/antdata/analysis/100809_P/100809_St7-10ul-p300_degenhar_Fraenkel_L2_mm9_sorted.bed", 
+  "analysis path": "/net/ventral/nfs/people/labadorf/analysis/100809_P_St7_10ul", 
+  "stage url": "http://fraenkel.mit.edu/stage/labadorf", 
+  "peak files": {
+    "100809_P_St7_10ul_mfold10,30_pval1e-5": {
+      "total tags in control": 9331149, 
+      "total tags in treatment": 10064908, 
+      "Range for calculating regional lambda": "1000 bps and 10000 bps", 
+      "tag size": 35, 
+      "name": "100809_P_St7_10ul_mfold10,30_pval1e-5", 
+      "model fold": "10,30", 
+      "format": "BED", 
+      "tags after filtering in treatment": 5099883, 
+      "band width": 150, 
+      "Redundant rate in control": 0.40999999999999998, 
+      "Redundant rate in treatment": 0.48999999999999999, 
+      "effective genome size": 2110000000.0, 
+      "d": 145, 
+      "maximum duplicate tags at the same position in control": 1, 
+      "control file": "cntrl_6-3_sorted_filterbed.txt", 
+      "MACS version": "1.4.0beta", 
+      "ChIP-seq file": "exp_100809_St7-10ul-p300_degenhar_Fraenkel_L2_mm9_sorted.bed", 
+      "tags after filtering in control": 5481613, 
+      "maximum duplicate tags at the same position in treatment": 2, 
+      "pvalue cutoff": 1.0000000000000001e-05
+    }
+  }, 
+  "format": "BED", 
+  "FDR filter": "none", 
+  "experiment name": "100809_P_St7_10ul", 
+  "mapping type": "TSS", 
+  "pipeline args": {
+    "--filter-peaks-args": "--sort-by=pvalue --top=200", 
+    "--macs-args": "--mfold=10,30 --tsize=35 --bw=150 --format=BED --pvalue=1e-5", 
+    "--map-args": "--tss --upstream-window=10000 --downstream-window=10000"
+  }, 
+  "org": "mm9", 
+  "control path": "/nfs/antdata/analysis/090828_42JVC/6-3/6-3_sorted_filterbed.txt", 
+  "mapping window": [
+    "10000", 
+    "10000"
+  ], 
+  "peaks used by THEME": "200", 
+  "stage_dir": "/nfs/antdata/web_stage/labadorf"
+}
+
+if __name__ == '__main__' :
+
+    opts, args = parser.parse_args(sys.argv[1:])
+
+    exp_dir = os.path.abspath(opts.dir)
+    exp_name = opts.name if opts.name is not None else os.path.basename(exp_dir)
+
+    # 1. find the param JSON file
+    param_json_fn = glob.glob('*params.json')
+    if len(param_json_fn) == 0 :
+        sys.stderr.write('Could not find parameter file, building one as best I can\n')
+        curr_user = getpass.getuser()
+        json_d = {'analysis path':os.getcwd(),
+                  'stage url':'http://fraenkel.mit.edu/stage/'+curr_user,
+                  'stage dir':'/nfs/antdata/web_stage/'+curr_user
+                 }
+    else :
+        if len(param_json_fn) > 1 :
+            sys.stderr.write('Found more than one parameter file, picking the first one: %s\n'%','.join(param_json_fn))
+        param_json_fn = param_json_fn[0]
+        json_d = json.load(open(param_json_fn))
+
+    # 2. make a new directory to save all the stuff
+    infosite_dir_name = exp_name+'_infosite'
+    infosite_path = os.path.join(os.getcwd(),infosite_dir_name)
+    if not os.path.exists(infosite_path) :
+        os.mkdir(infosite_path)
+
+    infosite_img_path = os.path.join(infosite_path,'images')
+    if not os.path.exists(infosite_img_path) :
+        os.mkdir(infosite_img_path)
+
+    # 3. setup web staging directory
+    stage_dir_path = os.path.join(json_d['stage dir'],infosite_dir_name)
+    if not os.path.exists(stage_dir_path) :
+        os.symlink(infosite_path,stage_dir_path)
+
+    # 4. get the peaks files stats, don't want negative peaks
+    if len(args) == 0 :
+        peaks_fns = glob.glob('*_peaks.xls')
+        peaks_fns = filter(lambda x: 'negative' not in x,peaks_fns)
+    else :
+        peaks_fns = args
+    analysis_sets = []
+    peak_json = json_d['peak files'] = {}
+
+    # analyze all the peak files
+    for peak_fn in peaks_fns :
+        print 'processing:',peak_fn
+        macs_f = MACSFile(peak_fn)
+        peak_json[peak_fn] = macs_f.file_info
+
+        # positive peaks
+        peak_stats = defaultdict(list)
+        num_peaks = 0
+        pos_chr_dist = defaultdict(int)
+        for peak in macs_f :
+            pos_chr_dist[peak['chr']] += 1
+            peak_stats['length'].append(peak['length'])
+            peak_stats['tags'].append(peak['tags'])
+            peak_stats['pvalue'].append(peak['-10*log10(pvalue)'])
+            peak_stats['fold_enrichment'].append(peak['fold_enrichment'])
+            peak_stats['fdr'].append(peak['FDR(%)'])
+            num_peaks += 1
+
+        peak_json[peak_fn]['positive peaks'] = num_peaks
+        peak_json[peak_fn]['reads under peaks'] = sum(peak_stats['tags'])
+
+        # extract paired peaks info out of output.txt
+        output_fn = peak_json[peak_fn]['name']+'_output.txt'
+        output_regexes = ('#2 number of (paired peaks): (\d+)',)
+        for l in open(output_fn) :
+            for regex in output_regexes :
+                m = re.search(regex,l)
+                if m is not None :
+                    peak_json[peak_fn][m.group(1)] = int(m.group(2))
+
+        # do the negative peaks
+        # negative peak file is now filtered
+        neg_peak_fns = glob.glob(peak_json[peak_fn]['name']+'_negative_peaks_*.xls')
+
+        #TODO - do check for file exists
+        if neg_peak_fns :
+            neg_peak_fn = neg_peak_fns[0]
+            neg_peak_f = MACSFile(neg_peak_fn)
+
+            neg_peak_stats = defaultdict(list)
+            num_peaks = 0
+            neg_chr_dist = defaultdict(int)
+            for peak in neg_peak_f :
+                neg_chr_dist[peak['chr']] += 1
+                neg_peak_stats['length'].append(peak['length'])
+                neg_peak_stats['tags'].append(peak['tags'])
+                neg_peak_stats['pvalue'].append(peak['-10*log10(pvalue)'])
+                neg_peak_stats['fold_enrichment'].append(peak['fold_enrichment'])
+                neg_peak_stats['fdr'].append(peak['FDR(%)'])
+                num_peaks += 1
+
+            peak_json[peak_fn]['negative peaks'] = num_peaks
+            peak_json[peak_fn]['reads under negative peaks'] = sum(peak_stats['tags'])
+        else :
+            peak_json[peak_fn]['negative peaks'] = 'NA'
+            peak_json[peak_fn]['reads under negative peaks'] = 'NA' 
+
+        # save the track lines
+        ucsc_track_fn = peak_json[peak_fn]['name']+'_MACS_wiggle_tracks.txt'
+        if os.path.exists(ucsc_track_fn) :
+            peak_json[peak_fn]['ucsc tracks'] = open(ucsc_track_fn).readlines()
+
+        font = {'size':'9'}
+        mp.rc('font',**font)
+
+        figsize = (3.5,3.5)
+        subplots_sizes = {'top':0.8,'left':0.15,'right':0.95}
+        hist_labels = ('+ peaks','- peaks')
+        # create histograms for each of the attributes
+        len_hist_name = macs_f.file_info['name']+'_length.png'
+        len_hist_fn = os.path.join(infosite_img_path,len_hist_name)
+        len_hist_url = json_d['stage url']+'/'+infosite_dir_name+'/images/'+len_hist_name
+        peak_json[peak_fn]['length distribution url'] = len_hist_url
+        mp.figure(figsize=figsize)
+        mp.subplots_adjust(**subplots_sizes)
+        mp.hist((peak_stats['length'],neg_peak_stats['length']),label=hist_labels,bins=20,log=True)
+        mp.title('%s\npeak length distribution'%macs_f.file_info['name'])
+        mp.xlabel('peak length')
+        mp.ylabel('# peaks')
+        mp.legend()
+        mp.savefig(len_hist_fn)
+        mp.clf()
+
+        tags_hist_name = macs_f.file_info['name']+'_tags.png'
+        tags_hist_fn = os.path.join(infosite_img_path,tags_hist_name)
+        tags_hist_url = json_d['stage url']+'/'+infosite_dir_name+'/images/'+tags_hist_name
+        peak_json[peak_fn]['tag distribution url'] = tags_hist_url
+        mp.figure(figsize=figsize)
+        mp.subplots_adjust(**subplots_sizes)
+        mp.hist((peak_stats['tags'],neg_peak_stats['tags']),label=hist_labels,bins=20,log=True)
+        mp.title('%s\npeak tag count distribution'%macs_f.file_info['name'])
+        mp.xlabel('# tags')
+        mp.ylabel('# peaks')
+        mp.legend()
+        mp.savefig(tags_hist_fn)
+        mp.clf()
+
+        pval_hist_name = macs_f.file_info['name']+'_pval.png'
+        pval_hist_fn = os.path.join(infosite_img_path,pval_hist_name)
+        pval_hist_url = json_d['stage url']+'/'+infosite_dir_name+'/images/'+pval_hist_name
+        peak_json[peak_fn]['pvalue distribution url'] = pval_hist_url
+        mp.figure(figsize=figsize)
+        mp.subplots_adjust(**subplots_sizes)
+        mp.hist((peak_stats['pvalue'],neg_peak_stats['pvalue']),label=hist_labels,bins=20,log=True)
+        mp.title('%s\npeak -10*log10(p-valuek) distribution'%macs_f.file_info['name'])
+        mp.xlabel('-10*log10(p-value)')
+        mp.ylabel('# peaks')
+        mp.legend()
+        mp.savefig(pval_hist_fn)
+        mp.clf()
+
+        fold_hist_name = macs_f.file_info['name']+'_fold.png'
+        fold_hist_fn = os.path.join(infosite_img_path,fold_hist_name)
+        fold_hist_url = json_d['stage url']+'/'+infosite_dir_name+'/images/'+fold_hist_name
+        peak_json[peak_fn]['fold distribution url'] = fold_hist_url
+        mp.figure(figsize=figsize)
+        mp.subplots_adjust(**subplots_sizes)
+        mp.hist((peak_stats['fold_enrichment'],neg_peak_stats['fold_enrichment']),label=hist_labels,bins=20,log=True)
+        mp.title('%s\npeak fold enrichment distribution'%macs_f.file_info['name'])
+        mp.xlabel('fold enrichment')
+        mp.ylabel('# peaks')
+        mp.legend()
+        mp.savefig(fold_hist_fn)
+        mp.clf()
+
+        fdr_hist_name = macs_f.file_info['name']+'_fdr.png'
+        fdr_hist_fn = os.path.join(infosite_img_path,fdr_hist_name)
+        fdr_hist_url = json_d['stage url']+'/'+infosite_dir_name+'/images/'+fdr_hist_name
+        peak_json[peak_fn]['fdr distribution url'] = fdr_hist_url
+        mp.figure(figsize=figsize)
+        mp.subplots_adjust(**subplots_sizes)
+        mp.hist(peak_stats['fdr'],label=hist_labels[0],bins=20,log=True)
+        mp.title('%s\npeak fdr distribution'%macs_f.file_info['name'])
+        mp.xlabel('fdr')
+        mp.ylabel('# peaks')
+        mp.legend()
+        mp.savefig(fdr_hist_fn)
+        mp.clf()
+
+        chr_dist_name = macs_f.file_info['name']+'_chr_dist.png'
+        chr_dist_fn = os.path.join(infosite_img_path,chr_dist_name)
+        chr_dist_url = json_d['stage url']+'/'+infosite_dir_name+'/images/'+chr_dist_name
+        peak_json[peak_fn]['chr distribution url'] = chr_dist_url
+        chromos = []
+        if json_d.has_key('org') :
+            chr_sizes_fn = get_org_settings(json_d['org'])['ucsc_chrom_sizes']
+            chromos = [r[0] for r in reader(open(chr_sizes_fn),delimiter='\t')]
+        else :
+            chromos = list(set(pos_chr_dist.keys()).union(neg_chr_dist.keys()))
+        standard_chromos = filter(lambda x: re.search('^chr[0-9MXY]+$',x) is not None,chromos)
+
+        # hack chrM, chrX and chrY so they sort right
+        if 'chrM' in standard_chromos :
+            standard_chromos[standard_chromos.index('chrM')] = 'chr100'
+        if 'chrX' in standard_chromos :
+            standard_chromos[standard_chromos.index('chrX')] = 'chr101'
+        if 'chrY' in standard_chromos :
+            standard_chromos[standard_chromos.index('chrY')] = 'chr102'
+
+        standard_chromos.sort(key=lambda x: int(x.replace('chr','')))
+
+        # unhack chrM, chrX and chrY so they display right
+        if 'chr100' in standard_chromos :
+            standard_chromos[standard_chromos.index('chr100')] = 'chrM'
+        if 'chr101' in standard_chromos :
+            standard_chromos[standard_chromos.index('chr101')] = 'chrX'
+        if 'chr102' in standard_chromos :
+            standard_chromos[standard_chromos.index('chr102')] = 'chrY'
+
+        other_chromos = filter(lambda x: re.search('^chr[0-9MXY]+$',x) is None,chromos)
+
+        pos_plot_chr_dist = defaultdict(int)
+        neg_plot_chr_dist = defaultdict(int)
+        for chrom in standard_chromos :
+            pos_plot_chr_dist[chrom] += pos_chr_dist.get(chrom,0)
+            neg_plot_chr_dist[chrom] += neg_chr_dist.get(chrom,0)
+        for chrom in other_chromos :
+            pos_plot_chr_dist['Other'] += pos_chr_dist.get(chrom,0)
+            neg_plot_chr_dist['Other'] += neg_chr_dist.get(chrom,0)
+        chromos.append('Other')
+        mp.figure(figsize=figsize)
+        mp.subplots_adjust(bottom=0.18,**subplots_sizes)
+        mp.bar(range(len(chromos)),
+               [pos_plot_chr_dist[k] for k in chromos],
+               width=0.45,
+               color='b',
+               label='Positive'
+              )
+        mp.bar([x+0.45 for x in range(len(chromos))],
+               [neg_plot_chr_dist[k] for k in chromos],
+               width=0.45,
+               color='g',
+               label='Negative'
+              )
+        mp.xticks([x+0.45 for x in range(len(chromos))],chromos,rotation=90)
+        mp.title('%s\nPeaks by chromosome'%macs_f.file_info['name'])
+        mp.xlabel('Chromosome')
+        mp.ylabel('# peaks')
+        mp.legend()
+        mp.savefig(chr_dist_fn)
+        mp.clf()
+
+        # pos vs neg peaks
+        pos_v_neg_name = '%s_pos_v_neg.png'%macs_f.file_info['name']
+        pos_v_neg_fn = os.path.join(infosite_img_path,pos_v_neg_name)
+        pos_v_neg_url = json_d['stage url']+'/'+infosite_dir_name+'/images/'+pos_v_neg_name
+        peak_json[peak_fn]['pos v neg url'] = pos_v_neg_url
+        cmd = 'plot_pos_vs_neg_peaks.py --output=%s %s %s'%(pos_v_neg_fn,peak_fn, neg_peak_fn)
+        sys.stderr.write(cmd+'\n')
+        r = call(cmd,shell=True)
+
+        # motif stuff
+        if opts.skip_motif_scan or opts.skip_motif_stuff :
+            sys.stderr.write('Obediently skipping motif stuff\n')
+        else :
+            # not exactly sure the best way to find the filtered macs file yet,
+            # just take the .xls file with the longest filename?
+            filtered_peak_fns = glob.glob('%s_peaks_*'%macs_f.file_info['name'])
+            filtered_peak_fns.sort(key=lambda x: len(x),reverse=True)
+            filtered_peak_fn = filtered_peak_fns[0]
+
+            motif_results_fns = glob.glob('%s_motifs_beta*_cv*[0-9].tamo'%macs_f.file_info['name'])
+            motif_results_fn = motif_results_fns[0]
+            #TODO - do check for file exists
+
+            # motif_scan.py <org> <peak fn> <TAMO motif fn>
+            fixed_peak_width = ''
+            if json_d['fixed peak width'] != 'none' :
+                fixed_peak_width = '--fixed-peak-width=%s'%json_d['fixed peak width']
+
+            cmd = 'motif_scan.py %s --dir=%s/images/ %s %s %s'
+            cmd = cmd%(fixed_peak_width,infosite_dir_name,json_d['org'],filtered_peak_fn,motif_results_fn)
+            sys.stderr.write(cmd+'\n')
+            call(cmd,shell=True)
+
+        # pot_peaks_vs_motifs.py <peaks fn> <seq score fn> <bg score fn>
+
+
+    # 5. build reSt document
+    reSt_fn = exp_name+'_info.rst'
+    reSt_path = os.path.join(infosite_path,reSt_fn)
+    reSt_html_name = exp_name+'_info.html'
+    reSt_html_path = os.path.join(infosite_path,reSt_html_name)
+    reSt_url = json_d['stage url'] + '/' + infosite_dir_name + '/' + reSt_html_name
+    doc = ReStDocument(reSt_path)
+    doc.add(ReStSection("Infopage for %s"%exp_name))
+
+    # basic experiment stats table
+    ident = lambda x: x or 'unknown'
+    stat_key_labels_fmts = [
+                        ('org','Organism',ident),
+                        ('analysis path','Analysis Path',ident),
+                        ('experiment path','Experiment Path',ident),
+                        ('control path','Control Path',ident),
+                        ('format','Read Format',ident),
+                        ('FDR filter','FDR filter',ident),
+                        ('mapping type','Gene Mapping Type',ident),
+                        ('mapping window','Gene Mapping Window',lambda x: x and '-%s,%s'%tuple(x)),
+                        ('peaks used by THEME','Peaks used by THEME',ident)
+                       ]
+    stat_rows = [('**%s**'%label, fmt(json_d.get(key))) for key,label,fmt in stat_key_labels_fmts]
+    doc.add(ReStSimpleTable(None,stat_rows))
+
+    doc.add(ReStSection('MACS Peak File Stats',level=2))
+
+    # go through peak files
+    peak_recs = json_d['peak files']
+    fl_str = lambda x: x and '%.2g'%float(x)
+    stat_key_labels_fmts = [
+                        ('paired peaks','*paired peaks*',ident),
+                        ('positive peaks','*positive peaks*',ident),
+                        ('negative peaks','*negative peaks*',ident),
+                        ('reads under peaks','*reads under positive peaks*',ident),
+                        ('total tags in treatment','*Treatment Tags*',ident),
+                        ('tags after filtering in treatment','after filtering',ident),
+                        ('Redundant rate in treatment','redunancy rate',fl_str),
+                        ('maximum duplicate tags at the same position in treatment','max dup. tags',ident),
+                        ('total tags in control','*Control Tags*',ident),
+                        ('tags after filtering in control','after filtering',ident),
+                        ('Redundant rate in control','redunancy rate',fl_str),
+                        ('maximum duplicate tags at the same position in control','max dup. tags',ident),
+                        ('peak tag count filter','*Minimum peak tag count*',ident),
+                        ('d','*MACS d*',ident),
+                        ('band width','*band width*',ident),
+                        ('MACS version','*MACS version*',ident),
+                        ('pvalue cutoff','*p-value cutoff*',lambda x: '1e%d'%int(log(x,10))),
+                       ]
+
+    for peak_fn,peak_stats in peak_recs.items() :
+
+        # add the new section and stats table
+        doc.add(ReStSection(peak_fn,level=3))
+        stat_rows = [('*%s*'%label, fmt(peak_stats.get(key))) for key,label,fmt in stat_key_labels_fmts]
+        doc.add(ReStSimpleTable(None,stat_rows))
+
+        # link to the peaks file
+        peak_infosite_name = os.path.join(infosite_dir_name,peak_fn)
+        peak_infosite_path = os.path.abspath(peak_infosite_name)
+        peak_infosite_url = json_d['stage url'] + '/' + peak_infosite_name
+        call('cp %s %s'%(peak_fn,os.path.join(infosite_dir_name,peak_fn)),shell=True)
+        doc.add(ReStSimpleTable(None,[('**MACS Peaks File**','`%s`_'%peak_infosite_url)]))
+        doc.add(ReStHyperlink(peak_infosite_url,url=peak_infosite_url))
+
+        # UCSC track info
+        if peak_stats.has_key('ucsc tracks') :
+            ucsc_tbl = ReStSimpleTable(('**UCSC Genome Browser Track Lines**',),
+                                      [[x] for x in peak_stats['ucsc tracks']])
+            doc.add(ucsc_tbl)
+        else :
+            doc.add(ReStSimpleTable(None,[['UCSC integration was not enabled for this experiment']]))
+
+        # peak quality plots
+        img_tbl1 = ReStSimpleTable(None, [
+                    [
+                     ReStImage(peak_stats['pos v neg url'],options={'width':'600px','align':'center'}),
+                    ]
+                   ]
+                  )
+        doc.add(img_tbl1)
+
+        img_tbl2 = ReStSimpleTable(None, [
+                    [
+                     ReStImage(peak_stats['length distribution url'],options={'width':'250px','align':'center'}),
+                     ReStImage(peak_stats['tag distribution url'],options={'width':'250px','align':'center'}),
+                     ReStImage(peak_stats['pvalue distribution url'],options={'width':'250px','align':'center'})
+                    ],
+                    [
+                     ReStImage(peak_stats['fold distribution url'],options={'width':'250px','align':'center'}),
+                     ReStImage(peak_stats['fdr distribution url'],options={'width':'250px','align':'center'}),
+                     ReStImage(peak_stats['chr distribution url'],options={'width':'250px','align':'center'})
+                    ]
+                  ]
+                  )
+        doc.add(img_tbl2)
+
+        # gene info
+        gene_fn = peak_stats['name']+'_genes.txt'
+        gene_link = os.path.join(infosite_dir_name,gene_fn)
+        if not os.path.exists(gene_link) :
+            shutil.copyfile(gene_fn,gene_link)
+        gene_url = json_d['stage url']+'/'+gene_link
+
+        # gather other gene mapping stats
+        # knownGeneID
+        # geneSymbol
+        # chr
+        # start
+        # end
+        # length
+        # summit
+        # tags
+        # -10*log10(pvalue)
+        # fold_enrichment
+        # FDR(%)
+        # peak
+        # loc
+        # dist
+        # from
+        # feature
+        # score
+        # map
+        # type
+        # map
+        # subtype
+
+        gene_reader = DictReader(open(gene_fn),delimiter='\t')
+        gene_stats = defaultdict(set)
+        gene_pvals = defaultdict(float)
+        for rec in gene_reader :
+            gene_stats['num knownGenes'].add(rec['knownGeneID'])
+            gene_stats['num geneSymbols'].add(rec['geneSymbol'])
+            gene_pvals[rec['geneSymbol']] = max(gene_pvals[rec['geneSymbol']],float(rec['-10*log10(pvalue)']))
+        gene_pvals = gene_pvals.items()
+        gene_pvals.sort(key=lambda x: x[1],reverse=True)
+        for k,v in gene_pvals[:20]:
+            print k,v
+        gene_mapping_data = [('**# knownGenes mapped**',len(gene_stats['num knownGenes'])),
+                             ('**# gene symbols mapped**',len(gene_stats['num geneSymbols'])),
+                             ('**Top 10 gene symbols**',','.join([x[0] for x in gene_pvals[:10]])),
+                             ('**All gene mappings**','`%s`_'%gene_url)
+                            ]
+
+        # plots from plot_peak_loc_dist.py
+        gene_pie_name = exp_name+'_gene_map.png'
+        peak_pie_name = exp_name+'_peak_map.png'
+        hist_name = exp_name+'_peak_dist.png'
+        pval_bar_name = exp_name+'_pval_bar.png'
+        peak_loc_d = {'out_dir':infosite_path,
+                      'gene_pie_fn':os.path.join(infosite_path,'images',gene_pie_name),
+                      'peak_pie_fn':os.path.join(infosite_path,'images',peak_pie_name),
+                      'pval_bar_fn':os.path.join(infosite_path,'images',pval_bar_name),
+                      'hist_fn':os.path.join(infosite_path,'images',hist_name),
+                      'peak_fn':peak_fn,
+                      'gene_name':gene_fn
+                      }
+        cmd = 'plot_peak_loc_dist.py --save -d %(out_dir)s -g %(gene_pie_fn)s ' \
+              '-p %(peak_pie_fn)s -f %(hist_fn)s -b %(pval_bar_fn)s ' \
+              '%(peak_fn)s %(gene_name)s'
+        sys.stderr.write(cmd%peak_loc_d+'\n')
+        call(cmd%peak_loc_d,shell=True)
+        peak_stats['gene map url'] = json_d['stage url']+'/'+infosite_dir_name+'/images/'+gene_pie_name
+        peak_stats['peak map url'] = json_d['stage url']+'/'+infosite_dir_name+'/images/'+peak_pie_name
+        peak_stats['pval bar url'] = json_d['stage url']+'/'+infosite_dir_name+'/images/'+pval_bar_name
+        peak_stats['dist url'] = json_d['stage url']+'/'+infosite_dir_name+'/images/'+hist_name
+
+        # make links to the different peaks files
+        feature_patts = ('promoter.txt','gene_exon.txt','gene_intron.txt','after.txt','intergenic.xls')
+        feature_data = []
+        feature_urls = []
+
+        for patt in feature_patts :
+            feature_fn = '%s_*_%s'%(peak_stats['name'],patt)
+            feature_path = glob.glob(os.path.join(infosite_dir_name,feature_fn))
+            if len(feature_path) == 0 :
+                sys.stderr.write('Warning: %s could not be found, skipping feature type\n'%os.path.join(infosite_dir_name,feature_fn))
+                continue
+            feature_path = feature_path[0]
+            feature_url = json_d['stage url']+'/'+feature_path
+
+            # create UCSC formatted versions of the files
+            if patt.endswith('.txt') : # these have gene columns
+                feature_type = patt.replace('.txt','')
+                ucsc_feature_fn = feature_fn.replace('.txt','_ucsc.txt')
+                st,en = 2,4
+            elif patt.endswith('.xls') :
+                feature_type = patt.replace('.xls','')
+                ucsc_feature_fn = feature_fn.replace('.xls','_ucsc.xls')
+                st,en = 0,2
+
+            ucsc_feature_path = os.path.join(infosite_dir_name,ucsc_feature_fn)
+            ucsc_feature_f = open(ucsc_feature_path,'w')
+            ucsc_feature_writer = writer(ucsc_feature_f,delimiter='\t')
+            for l in reader(open(feature_path),delimiter='\t') :
+                rec = l[0:st] + \
+                      ['%s:%s-%s'%tuple(l[st:en+1])] + \
+                      l[en+1:]
+                ucsc_feature_writer.writerow(rec)
+            ucsc_feature_f.close()
+
+            ucsc_feature_url = json_d['stage url']+'/'+ucsc_feature_path
+
+            feature_data.append(('**%s peaks**'%feature_type,'`%s`_ `UCSC %s`_'%(feature_url,feature_type)))
+            feature_urls.append(ReStHyperlink(feature_url,url=feature_url))
+            feature_urls.append(ReStHyperlink('UCSC %s'%feature_type,url=ucsc_feature_url))
+
+        gene_mapping_data.extend(feature_data)
+        feat_tbl = ReStSimpleTable(('**Gene mapping data**',''),gene_mapping_data)
+        doc.add(feat_tbl)
+        doc.add(ReStHyperlink(gene_url,url=gene_url))
+        for url in feature_urls :
+            doc.add(url)
+
+        img_tbl3 = ReStSimpleTable(None, [
+                    [
+                     ReStImage(peak_stats['gene map url'],options={'align':'center'}),
+                     ReStImage(peak_stats['peak map url'],options={'align':'center'})
+                    ],
+                    [
+                     ReStImage(peak_stats['pval bar url'],options={'align':'center'}),
+                     ReStImage(peak_stats['dist url'],options={'align':'center'})
+                    ]
+                   ]
+                   )
+        doc.add(img_tbl3)
+
+        # now put some motif stuff up there
+
+
+        if opts.skip_motif_stuff :
+            sys.stderr.write('Obediently skipping even more motif stuff\n')
+        else :
+            # THEME refines all motifs, display the top 30
+
+            # for now, just list a table of the top 30 significant, unrefined motifs
+            doc.add(ReStSection('%s Top 30 Refined Motif Results'%peak_stats['name'],level=3))
+            motif_results_fns = glob.glob('%s_motifs_beta*_cv*[0-9].txt'%macs_f.file_info['name']) #catRun_mfold10,30_pval1e-5_motifs_beta0.0_cv5.txt
+            #TODO - do check for file exists
+
+            motif_results_fn = motif_results_fns[0]
+
+            motif_reader = reader(open(motif_results_fn),delimiter='\t')
+
+            motif_header = motif_reader.next()
+            motif_data = []
+            top_n = 30
+            motif_fmts = (ident,ident,int,fl_str,fl_str,fl_str,fl_str,fl_str,fl_str)
+            motif_plot_urls = []
+            for rec in motif_reader :
+                motif_data.append([f(x) for f,x in zip(motif_fmts,rec)])
+                """
+                if rec[2] in motif_sig_inds_d.keys() :
+                    from_id = motif_sig_inds_d[rec[2]]
+                    try :
+                        old_id_fn = glob.glob(infosite_dir_name+'/images/*_%d_peakmot.png'%from_id)[0]
+                        new_id_fn = old_id_fn.replace('_%d_'%from_id,'_%s_'%rec[2])
+                        os.rename(old_id_fn,new_id_fn)
+                    except :
+                        sys.stderr.write("Couldn't rename file for pattern %s, just " \
+                                         "assuming its there\n"%(infosite_dir_name+'/images/*_%d_peakmot.png'%from_id))
+                """
+                new_id_fn = glob.glob(infosite_dir_name+'/images/*_%s_peakmot.png'%rec[2])[0]
+                motif_plot_urls.append(json_d['stage url']+'/'+new_id_fn)
+
+            doc.add(ReStSimpleTable(['**%s**'%x for x in motif_header],motif_data[:top_n]))
+
+            # create another file with the full table
+            motif_results_base, motif_results_ext = os.path.splitext(motif_results_fn)
+            motif_doc_fn = motif_results_base+'.rst'
+            motif_doc_path = os.path.join(infosite_path,motif_doc_fn)
+            motif_doc_html_fn = motif_results_base+'.html'
+            motif_doc_html_path = os.path.join(infosite_path,motif_doc_html_fn)
+            motif_doc_url = json_d['stage url']+'/'+infosite_dir_name+'/'+motif_doc_html_fn
+            motif_doc = ReStDocument(motif_doc_path)
+            motif_doc.add(ReStSection('%s Full Motif Results'%peak_stats['name']))
+            motif_doc.add('`Back to main infopage`_')
+            motif_doc.add(ReStSimpleTable(['**%s**'%x for x in motif_header],motif_data))
+            motif_doc.add('`Back to main infopage`_')
+            motif_doc.add(ReStHyperlink('Back to main infopage',url=reSt_url))
+            motif_doc.write()
+            motif_doc.close()
+            rst2html_call = 'rst2html.py --stylesheet-path=/nfs/antdata/web_stage/css/lsr.css ' \
+                            '%s %s'%(motif_doc_path,motif_doc_html_path)
+            sys.stderr.write(rst2html_call+'\n')
+            r = call(rst2html_call,shell=True)
+            doc.add('`All refined motifs`_')
+            doc.add(ReStHyperlink('All refined motifs',url=motif_doc_url))
+
+            # individual motif plots
+            plt_tbl = []
+            for i,url in enumerate(motif_plot_urls[:30]) :
+                if i%3 == 0 :
+                    plt_tbl.append([])
+                plt_tbl[-1].append(ReStImage(url))
+
+            doc.add(ReStSimpleTable(('**Peak strength vs refined motif strength**','(based on top 2000 peak sequences by pvalue)',''),plt_tbl))
+
+    doc.write()
+    doc.close()
+
+    # 6. convert reSt to PDF and HTML
+    rst2html_call = 'rst2html.py --stylesheet-path=/nfs/antdata/web_stage/css/lsr.css ' \
+                    '%s %s'%(reSt_path,reSt_html_path)
+    sys.stderr.write(rst2html_call+'\n')
+    r = call(rst2html_call,shell=True)
+
+    pdf_name = exp_name+'_info.pdf'
+    pdf_path = os.path.join(infosite_path,pdf_name)
+    r = call('rst2pdf %s -o %s'%(reSt_path,pdf_path),shell=True)
+
+    # 7. write out url to infosite
+    print json_d['stage url']+'/'+infosite_dir_name+'/'+reSt_html_name
+    open(infosite_dir_name+'_url.txt','w').write(json_d['stage url']+'/'+infosite_dir_name+'/'+reSt_html_name+'\n')
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/scripts/chipseq_pipeline.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,331 @@
+#!/usr/bin/env python
+
+import os
+from subprocess import Popen, PIPE
+import string
+import sys
+from optparse import OptionParser, OptionGroup, SUPPRESS_HELP
+
+from pypeline import Pypeline, ProcessPypeStep as PPS, PythonPypeStep as PyPS, parse_steplist
+from chipsequtil import get_file_parts, get_org_settings
+from chipsequtil.util import MultiLineHelpFormatter
+from TAMO import MotifTools
+from TAMO.MD.THEME import parser as theme_parser
+
+usage = "%prog [options] <organism> <experiment alignment filename> [<control alignment filename>]"
+description = """1st generation ChIPSeq analysis pipeline:
+
+  - runs MACS to find peaks and sorts peaks by p-value
+  - sorts peaks by pvalue and isolates top *n*
+  - maps peaks to genes
+  - extracts fasta files for gene peaks in experiments
+  - constructs background sequences matching foreground distribution
+  - runs THEME.py on input sequences w/ refinement
+  - builds an infosite with stats from this analysis
+
+Control input file is optional.  *organism* argument is passed to the
+*org_settings.py* command to specify organism specific parameters, ensure
+that the following commands return valid paths:
+
+If running MACS:
+ - org_settings.py <organism> genome_size
+ - org_settings.py <organism> genome_dir
+ - org_settings.py <organsim> refgene_anno_path
+
+If running THEME:
+ - org_settings.py <organism> theme_hypotheses
+ - org_settings.py <organism> theme_markov
+
+"""
+
+epilog = """Note: it is advised to leave the --*-args arguments unchanged
+unless you really know what you're doing."""
+
+parser = OptionParser(usage=usage,description=description,epilog=epilog,formatter=MultiLineHelpFormatter())
+parser.add_option('--auto',dest='auto',action='store_true',help='run all steps non-interactively (for batch mode, e.g.)')
+parser.add_option('--steplist',dest='steplist',default='',help='with --auto, run specific steps')
+parser.add_option('--exp-name',dest='exp_name',default=os.path.basename(os.getcwd()),help='name for the experiment/pipeline, used for convenience [default: current directory name]')
+parser.add_option('--bed-args',dest='bed_args',default='--stdout --chromo-strip=.fa',help='double quote wrapped arguments for gerald_to_bed.py [default: %default]')
+#parser.add_option('--stats-args',dest='stats_args',default='',help='double quote wrapped arguments for gerald_stats.py [default: %default]')
+parser.add_option('--macs-exec',dest='macs_exec',default='macs14',help='the executable to use for MACS, if not an absolute path it needs to be on your shell environment path [default: %default]')
+parser.add_option('--macs-args',dest='macs_args',default='--pvalue=1e-5',help='double quote wrapped arguments for macs, only changing --mfold, --tsize, --bw, and --pvalue recommended [default: %default]')
+parser.add_option('--map-args',dest='map_args',default='--tss --upstream-window=10000 --downstream-window=10000',help='double quote wrapped arguments for mapping peaks to genes [default: %default]')
+parser.add_option('--filter-peaks-args',dest='filter_peaks_args',default="--sort-by=pvalue --top=1000 -f 'tags>20'",help='double quote wrapped arguments for filter_macs_peaks.py [default: %default]')
+parser.add_option('--filter-neg-peaks-args',dest='filter_neg_peaks_args',default="-f 'tags>20'",help='double quote wrapped arguments for filter_macs_peaks.py applied to negative peaks [default: %default]')
+parser.add_option('--peaks-to-fa-args',dest='peaks_to_fa_args',default='--fixed-peak-width=200',help='double quote wrapped arguments for peaks_to_fasta.py [default: %default]')
+parser.add_option('--bg-exec',dest='bg_exec',default='rejection_sample_fasta.py',help='the executable to use for generating background sequences for THEME, if not an absolute path it needs to be on your shell environment path [default: %default]')
+parser.add_option('--bg-args',dest='bg_args',default='--num-seq=2.1x',help='double quote wrapped arguments for background sequence generation utility [default: %default]')
+parser.add_option('--theme-args',dest='theme_args',default='--beta=0.7 --cv=5 --trials=25',help='double quote wrapped arguments for THEME.py [default: %default]')
+parser.add_option('--motif-pval-cutoff',dest='motif_pval',type='float',default=1e-5,help='the p-value cutoff for sending non-refined enrichmed motifs to THEME for refinement')
+parser.add_option('--parallelize',dest='parallelize',action='store_true',help='parallelize portions of the pipeline using qsub, only works from SGE execution hosts')
+parser.add_option('--ucsc',dest='ucsc',action='store_true',default=False,help='perform tasks for automated integration with UCSC genome browser [default:%default]')
+parser.add_option('--build-infosite-args',dest='infosite_args',default='',help='arguments to pass to build_chipseq_infosite.py [default: None]')
+
+ucsc_group = OptionGroup(parser,"UCSC Integration Options (with --ucsc)")
+ucsc_group.add_option('--stage-dir',dest='stage_dir',default='./',help='root directory where UCSC integration files should be made available [default: %default]')
+ucsc_group.add_option('--stage-url',dest='stage_url',default='http://localhost/',help='URL where UCSC integration files will be made available over the web [default: %default]')
+parser.add_option_group(ucsc_group)
+
+#parallel_group = OptionGroup(parser,"Parallelization Options (with --parallelize)",description="These options are relevant to parallelization of the pipeline, functionality is in beta status until further notice")
+#parallel_group.add_option('--split-args',dest='split_args',default='--type=count --arg=16',help='double quote wrapped arguments for split_file.py [default: %default]')
+#parallel_group.add_option('--qsub-args',dest='qsub_args',default='--die-on-err',help='double quote wrapped arguments for split_qsub.py [default: %default]')
+#parser.add_option_group(parallel_group)
+
+parser.add_option('--print-args',dest='print_args',action='store_true',help=SUPPRESS_HELP) # secret ninja option
+
+
+if __name__ == '__main__' :
+
+    # parse command line arguments
+    opts, args = parser.parse_args(sys.argv[1:])
+
+    # stick it up here, so when we print out args it's updated
+    if opts.ucsc and opts.macs_args.find('--wig') == -1 :
+        opts.macs_args += " --wig"
+
+    #  just print out all options as passed in for script generating purposes
+    if opts.print_args :
+        opts_strs = []
+        all_opts = []
+        all_opts.extend(parser.option_list)
+        all_opts.extend(*[x.option_list for x in parser.option_groups])
+        for opt in all_opts :
+            opt_str = opt.get_opt_string()
+            if opt_str in ['--help','--print-args'] :
+                pass
+            elif opt_str == '--steplist' and not opts.auto :
+                pass
+            #elif opt_str in ['--stage-dir','--stage-url'] and not opts.ucsc :
+            #    pass
+            #elif opt_str in ['--split-args','--qsub-args'] and not opts.parallelize :
+            #    pass
+            elif opt.action == 'store' :
+                arg = str(getattr(opts,opt.dest))
+                if arg.count(' ') > 0 or arg.find(' -') != -1 or arg.startswith('-') or arg.find('--') != -1 :
+                    opts_strs.append('    %s="%s"'%(opt.get_opt_string(),str(getattr(opts,opt.dest))))
+                else :
+                    opts_strs.append('    %s=%s'%(opt.get_opt_string(),str(getattr(opts,opt.dest))))
+            elif opt.action == 'store_true' and getattr(opts,opt.dest) :
+                opts_strs.append('    %s'%opt.get_opt_string())
+        opts_strs.append('    $@')
+        sys.stdout.write(' \\\n'.join(opts_strs)+'\n')
+        sys.exit(0)
+
+    if len(args) < 2 :
+        parser.error('Must provide two non-option arguments')
+
+    # filenames and paths
+    organism, experiment_fn = args[0:2]
+    control_fn = None
+    if len(args) > 2 :
+        control_fn = args[2]
+
+    org_settings = get_org_settings(organism)
+    refgene_fn = org_settings['refgene_anno_path']
+    kg_ref = org_settings['known_gene_anno_path']
+    kg_xref = org_settings['known_gene_xref_path']
+
+    exp_fpath,exp_fname,exp_fbase,exp_fext = get_file_parts(experiment_fn)
+    exp_wrk_dir = os.path.abspath('.exp_%s_%s'%(exp_fbase,opts.exp_name))
+
+    if control_fn :
+        cnt_fpath,cnt_fname,cnt_fbase,cnt_fext = get_file_parts(control_fn)
+        cnt_wrk_dir = os.path.abspath('.cnt_%s_%s'%(cnt_fbase,opts.exp_name))
+
+    # the pipeline
+    #log_fn = os.path.join(opts.exp_name+'_pipeline.log')
+    pipeline = Pypeline('Analysis pipeline for %s'%opts.exp_name)
+
+    steps = []
+
+    #if opts.parallelize :
+    #    # split up files
+    #    calls = ["mkdir %s"%exp_wrk_dir,
+    #             "split_file.py %s --outdir=%s %s"%(opts.split_args,exp_wrk_dir,experiment_fn),]
+    #    if control_fn :
+    #            calls.extend(["mkdir %s"%cnt_wrk_dir,
+    #             "split_file.py %s --outdir=%s %s"%(opts.split_args,cnt_wrk_dir,control_fn),
+    #            ])
+    #    steps.append(PPS('Split files',calls,env=os.environ))
+
+    ############################################################################
+    # run macs
+    ############################################################################
+    cnt_flag = ''
+    if control_fn :
+        cnt_flag = '-c %s'%control_fn
+
+    # parse macs_args so we can extract mfold and pvalue...in a rather silly way
+    macs_mfold = [x for x in opts.macs_args.split(' ') if 'mfold' in x]
+    macs_mfold = macs_mfold[0].split('=',1)[1] if len(macs_mfold) >= 1 else 'DEF'
+
+    macs_pvalue = [x for x in opts.macs_args.split(' ') if 'pvalue' in x]
+    macs_pvalue = macs_pvalue[0].split('=',1)[1] if len(macs_pvalue) >= 1 else 'DEF'
+    macs_name = opts.exp_name+'_mfold%s_pval%s'%(macs_mfold,macs_pvalue)
+
+    macs_peaks_fn = macs_name+'_peaks.xls'
+    macs_neg_peaks_fn = macs_name+'_negative_peaks.xls'
+    macs_screen_output_fn = macs_name+'_output.txt'
+
+    macs_d = {'exp_fn':experiment_fn,
+              'cnt_flag':cnt_flag,
+              'name':macs_name,
+              'macs_exec':opts.macs_exec,
+              'macs_args':opts.macs_args,
+              'macs_out':macs_screen_output_fn,
+              'gsize':org_settings['genome_size'],
+              }
+    calls = ["%(macs_exec)s --gsize=%(gsize)s -t %(exp_fn)s %(cnt_flag)s --name=%(name)s %(macs_args)s 2>&1 | tee %(macs_out)s"%macs_d]
+    steps.append(PPS('Run MACS',calls,env=os.environ))
+
+
+    ############################################################################
+    # process and stage wiggle files
+    ############################################################################
+    if opts.ucsc :
+        wiggle_dir = macs_name+'_MACS_wiggle'
+        ucsc_d = {'org':organism,
+                  'stage_dir':opts.stage_dir,
+                  'stage_url':opts.stage_url,
+                  'macs_dir':wiggle_dir,
+                 }
+
+        calls = ["integrate_macs_ucsc.py --auto %(org)s %(stage_dir)s %(stage_url)s %(macs_dir)s"%ucsc_d]
+        steps.append(PPS("UCSC Integration",calls))
+
+
+    ############################################################################
+    # map peaks to genes
+    ############################################################################
+    map_fn = "%s_genes.txt"%macs_name
+    map_stats_fn = "%s_genes_stats.xls"%macs_name
+    map_d = {'kg_ref':kg_ref,
+             'kg_xref':kg_xref,
+             'peaks_fn':macs_peaks_fn,
+             'bed_peaks_fn':macs_name+'_peaks.bed',
+             'map_fn':map_fn,
+             'map_stats_fn':map_stats_fn,
+             'map_args':opts.map_args
+            }
+    # make sure peak files don't have .fa at the end of their chromosomes
+    calls = ["sed -i 's/\.fa//g' %(peaks_fn)s %(bed_peaks_fn)s"%map_d]
+    c = "map_peaks_to_known_genes.py %(map_args)s --map-output=%(map_fn)s " + \
+         "--detail --stats-output=%(map_stats_fn)s %(kg_ref)s %(kg_xref)s " + \
+         "%(peaks_fn)s"
+    calls.append(c%map_d)
+    steps.append(PPS('Map peaks to genes',calls,env=os.environ))
+
+
+    ############################################################################
+    # filter macs peaks
+    ############################################################################
+    filtered_d = {'filter_peaks_args':opts.filter_peaks_args,
+                  'filter_neg_peaks_args':opts.filter_neg_peaks_args,
+                  'peaks_fn':macs_peaks_fn,
+                  'neg_peaks_fn':macs_neg_peaks_fn
+                 }
+    c = "filter_macs_peaks.py --print-encoded-fn --encode-filters " \
+        "%(filter_peaks_args)s %(peaks_fn)s"
+    filtered_peaks_fn = Popen(c%filtered_d,shell=True,stdout=PIPE).communicate()[0]
+    filtered_neg_peaks_fn = macs_name + '_negative_peak_filt.xls'
+    calls = ["filter_macs_peaks.py --encode-filters %(filter_peaks_args)s %(peaks_fn)s"%filtered_d]
+    if control_fn is not None :
+         calls.append("filter_macs_peaks.py --encode-filters %(filter_neg_peaks_args)s %(neg_peaks_fn)s"%filtered_d)
+    steps.append(PPS('Filter MACS peaks',calls,env=os.environ))
+
+
+    ############################################################################
+    # THEME
+    ############################################################################
+    # extract foreground and generate background sequences
+    fg_fn = filtered_peaks_fn.replace('.xls','.fa')
+    fg_d = {'opts':opts.peaks_to_fa_args,
+            'organism':organism,
+            'fg_fn':fg_fn,
+            'peaks_fn':filtered_peaks_fn}
+    calls = ["peaks_to_fasta.py %(opts)s --output=%(fg_fn)s %(organism)s %(peaks_fn)s"%fg_d]
+    steps.append(PPS('Peaks to Fasta',calls,env=os.environ))
+
+    bg_fn = "%s_bg.fa"%macs_name
+    bg_d = {'opts':opts.bg_args,
+            'organism':organism,
+            'fg_fn':fg_fn,
+            'bg_fn':bg_fn}
+    calls = ["rejection_sample_fasta.py %(opts)s --output=%(bg_fn)s %(organism)s %(fg_fn)s"%bg_d]
+    steps.append(PPS('Generate Background Sequences',calls,env=os.environ))
+
+    # run THEME on fg
+    theme_opts, theme_args = theme_parser.parse_args(opts.theme_args.split(' '))
+    hyp_fn = org_settings['theme_hypotheses']
+    markov_fn = org_settings['theme_markov']
+
+    # run THEME w/ randomization by running each motif individuall
+    # this is because TAMO.MD has a memory leak
+    raw_motif_fn = '%s_motifs_beta%s_cv%s.tamo'%(macs_name,theme_opts.beta,theme_opts.cv)
+    random_cv_fn = '%s_motifs_beta%s_cv%s_rand.txt'%(macs_name,theme_opts.beta,theme_opts.cv)
+
+    # new old THEME call
+    #Usage: THEME.sh [options] <FG_FASTA> <BG_FASTA> <HYP_FN> <MARKOV>
+    #
+    #Run old THEME version
+    #
+    #Options:
+    #  -h, --help            show this help message and exit
+    #  --hyp-indices=HYP_INDS
+    #                        0-based indices of hypotheses to run [default: ALL]
+    #  --no-refine           do not run with refinement
+    #  --no-parallelize      do not use wqsub.py for parallelization
+    #  -v, --verbose         print out the commands that are being run
+    #  --dump                dump categtories to file
+    #  --output-filename=OUTPUT_FN
+    #                        filename to write motif results to [default:dummy.txt]
+    #  --random-output=RANDOM_FN
+    #                        filename to write motif results to
+    #                        [default:random.txt]
+    #  --motif-file=MOTIF_FN
+    #                        filename to write motif results to [default:dummy.out]
+    #  --beta=BETA           beta parameter to use [default:0.7]
+    #  --delta=DELTA         delta parameter to use [default:0.001]
+    #  --remove-common       remove common sequences from analysis
+    #  --randomization       run randomization
+    #  --cv=CV               number of cross validation folds [default:5]
+    #  --interactive         run the script interactively
+
+    motif_fn = '%s_motifs_beta%s_cv%s.txt'%(macs_name,theme_opts.beta,theme_opts.cv)
+    theme_d = {'opts':opts.theme_args,
+               'fg_fn':fg_fn,
+               'bg_fn':bg_fn,
+               'hyp':hyp_fn,
+               'markov':markov_fn,
+               'tamo_motif_fn':raw_motif_fn,
+               'random_fn':random_cv_fn,
+               'motif_fn':motif_fn
+              }
+
+    theme_call = "THEME.sh %(opts)s %(fg_fn)s %(bg_fn)s %(hyp)s %(markov)s " \
+                 "--motif-file=%(tamo_motif_fn)s " \
+                 "--random-output=%(random_fn)s " \
+                 "--output-filename=%(motif_fn)s " \
+                 "--randomization"
+
+    calls = [theme_call%theme_d]
+    steps.append(PPS('Run THEME',calls,env=os.environ))
+
+    # build infosite
+    calls = ['build_chipseq_infosite.py %s'%opts.infosite_args]
+    steps.append(PPS('Build infosite',calls,env=os.environ))
+
+    # cleanup
+    rm_str = "rm -f %(d)s/*.out %(d)s/*.err %(d)s/*.script %(d)s/*.stats %(d)s/*.bed"
+    calls = [rm_str%{'d':exp_wrk_dir}]
+
+    if control_fn :
+         calls.append(rm_str%{'d':cnt_wrk_dir})
+    #steps.append(PPS('Clean up',calls,env=os.environ))
+
+    pipeline.add_steps(steps)
+    if opts.auto and opts.steplist is not None :
+        steplist = parse_steplist(opts.steplist,pipeline)
+    else :
+        steplist = None
+    pipeline.run(interactive=not opts.auto,steplist=steplist)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/scripts/chipseq_pipeline_wo_ctrl.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,172 @@
+#!/usr/bin/env python
+
+import os
+import sys
+from optparse import OptionParser, OptionGroup
+
+from pypeline import Pypeline, ProcessPypeStep as PPS
+from chipsequtil import get_file_parts, get_org_settings
+from chipsequtil.util import MultiLineHelpFormatter
+
+usage = "%prog [options] <organism> <experiment GERALD alignment filename> [<control GERALD alignment filename>]"
+description = """1st generation ChIPSeq analysis pipeline:
+
+  - converts Illumina GERALD alignment files to BED format
+  - calculates statistics on input alignments
+  - runs MACS to find peaks
+  - maps peaks to genes
+  - extracts fasta files for gene peaks in experiments
+  - constructs background sequences matching foreground distribution
+  - runs THEME.py on input sequences
+  - runs THEME.py randomization
+  - creates documentation on entire pipeline run
+
+Control input file is optional.  *organism* argument is passed to the
+*org_settings.py* command to specify organism specific parameters, ensure
+that the following commands return valid paths:
+
+If running MACS:
+ - org_settings.py <organism> genome_size
+ - org_settings.py <organism> genome_dir
+ - org_settings.py <organsim> annotation_path
+
+If running THEME:
+ - org_settings.py <organism> theme_hypotheses
+ - org_settings.py <organism> theme_markov
+
+"""
+
+epilog = """Note: it is advised to leave the --*-args arguments unchanged
+unless you really know what you're doing."""
+
+parser = OptionParser(usage=usage,description=description,epilog=epilog,formatter=MultiLineHelpFormatter())
+parser.add_option('--auto',dest='auto',action='store_true',help='run all steps non-interactively (for batch mode, e.g.)')
+parser.add_option('--exp-name',dest='exp_name',default=os.path.basename(os.getcwd()),help='name for the experiment/pipeline, used for convenience [default: current directory name]')
+parser.add_option('--split-args',dest='split_args',default='--type=count --arg=16',help='double quote wrapped arguments for split_file.py [default: %default]')
+parser.add_option('--bed-args',dest='bed_args',default='--stdout --chromo-strip=.fa',help='double quote wrapped arguments for gerald_to_bed.py [default: %default]')
+parser.add_option('--stats-args',dest='stats_args',default='',help='double quote wrapped arguments for gerald_stats.py [default: %default]')
+parser.add_option('--qsub-args',dest='qsub_args',default='--die-on-err',help='double quote wrapped arguments for split_qsub.py [default: %default]')
+parser.add_option('--macs-args',dest='macs_args',default='--mfold=10 --tsize=35 --bw=150 --pvalue=1e-5',help='double quote wrapped arguments for macs, only changing --mfold, --tsize, --bw, and --pvalue recommended [default: %default]')
+parser.add_option('--pk-to-fa-args',dest='pk_to_fa_args',default='--bg-type=rej_samp',help='double quote wrapped arguments for peaks_to_fasta.py [default: %default]')
+parser.add_option('--theme-args',dest='theme_args',default='--beta=0.7 --cv=5',help='double quote wrapped arguments for THEME.py [default: %default]')
+
+
+if __name__ == '__main__' :
+
+    # parse command line arguments
+    opts, args = parser.parse_args(sys.argv[1:])
+
+    if len(args) < 3 :
+        parser.error('Must provide two non-option arguments')
+
+    # filenames and paths
+    organism, experiment_fn, control_fn = args[0:3]
+    control_fn = None
+    if len(args) > 3 :
+        control_fn = args[2]
+
+    org_settings = get_org_settings(organism)
+    refseq_fn = org_settings['annotation_path']
+
+    exp_fpath,exp_fname,exp_fbase,exp_fext = get_file_parts(experiment_fn)
+    exp_wrk_dir = os.path.abspath('.exp_%s_%s'%(exp_fbase,opts.exp_name))
+
+    if control_fn :
+        cnt_fpath,cnt_fname,cnt_fbase,cnt_fext = get_file_parts(control_fn)
+        cnt_wrk_dir = os.path.abspath('.cnt_%s_%s'%(cnt_fbase,opts.exp_name))
+
+    # the pipeline
+    pipeline = Pypeline()
+
+    steps = []
+
+    # split up files
+    calls = ["mkdir %s"%exp_wrk_dir,
+             "split_file.py %s --outdir=%s %s"%(opts.split_args,exp_wrk_dir,experiment_fn),]
+    if control_fn :
+            calls.extend(["mkdir %s"%cnt_wrk_dir,
+             "split_file.py %s --outdir=%s %s"%(opts.split_args,cnt_wrk_dir,control_fn),
+            ])
+    steps.append(PPS('Split files',calls,env=os.environ))
+
+    # convert to BED format
+    exp_bed_fn = "%s_exp.bed"%exp_fbase
+    calls = ["split_qsub.py %s --ext=.bed gerald_to_bed.py --util-args=\"%s\" %s/*.[0-9][0-9][0-9][0-9]"%(opts.qsub_args,opts.bed_args,exp_wrk_dir),
+             "wait_for_qsub.py",
+             "cat %s/*.bed > %s"%(exp_wrk_dir,exp_bed_fn),
+            ]
+
+    if control_fn :
+        cnt_bed_fn = "%s_cnt.bed"%cnt_fbase
+        calls.extend(["split_qsub.py %s --ext=.bed gerald_to_bed.py --util-args=\"%s\" %s/*.[0-9][0-9][0-9][0-9]"%(opts.qsub_args,opts.bed_args,cnt_wrk_dir),
+                      "wait_for_qsub.py",
+                      "cat %s/*.bed > %s"%(cnt_wrk_dir,cnt_bed_fn),
+                     ])
+
+    steps.append(PPS('Convert GERALD to BED format',calls,env=os.environ))
+
+    #steps.append(PPS('Helloooooooo nurse','echo Helloooooooo nurse'))
+    # generate alignment statistics
+    exp_stats_fn = '%s_stats.txt'%exp_fbase
+    calls = ["split_qsub.py %s --ext=.stats gerald_stats.py --util-args=\"%s\" %s/*.[0-9][0-9][0-9][0-9]"%(opts.qsub_args,opts.stats_args,exp_wrk_dir),
+             "wait_for_qsub.py",
+             "combine_gerald_stats.py %s/*.stats > %s"%(exp_wrk_dir,exp_stats_fn),
+            ]
+
+    if control_fn :
+        cnt_stats_fn = '%s_stats.txt'%cnt_fbase
+        calls.extend(["split_qsub.py %s --ext=.stats gerald_stats.py --util-args=\"%s\" %s/*.[0-9][0-9][0-9][0-9]"%(opts.qsub_args,opts.stats_args,cnt_wrk_dir),
+                 "wait_for_qsub.py",
+                 "combine_gerald_stats.py %s/*.stats > %s"%(cnt_wrk_dir,cnt_stats_fn),
+                ])
+    steps.append(PPS('Calculate alignment statistics',calls,env=os.environ))
+
+    # run macs
+    cnt_flag = ''
+    if control_fn :
+        cnt_flag = '-c %s'cnt_bed_fn
+
+    macs_d = {'exp_fn':exp_bed_fn,
+              'cnt_flag':cnt_flag,
+              'name':opts.exp_name,
+              'macs_args':opts.macs_args,
+              'gsize':org_settings['genome_size'],
+              }
+    calls = ["macs --gsize=%(gsize)s -t %(exp_fn)s %(cnt_flag)s --name=%(name)s --format=BED %(macs_args)s"%macs_d]
+    steps.append(PPS('Run MACS',calls,env=os.environ))
+
+    # map peaks to genes
+    peaks_fn = "%s_peaks.bed"%opts.exp_name
+    map_fn = "%s_genes.txt"%opts.exp_name
+    map_stats_fn = "%s_genes_stats.txt"%opts.exp_name
+    calls = ["map_peaks_to_genes.py --peaks-format=BED %(refGene_fn)s %(peaks_fn)s --map-output=%(map_fn)s --stats-output=%(map_stats_fn)s"%{'refGene_fn':refseq_fn,'peaks_fn':peaks_fn,'map_fn':map_fn,'map_stats_fn':map_stats_fn}]
+    steps.append(PPS('Map peaks to genes',calls,env=os.environ))
+
+    # THEME
+    # extract foreground and generate background sequences
+    fg_fn = "%s_peaks.fa"%opts.exp_name
+    bg_fn = "%s_bg.fa"%opts.exp_name
+    nib_dir = org_settings['genome_dir']
+    calls = ["peaks_to_fasta.py %(opts)s --output=%(fg_fn)s --bg-fn=%(bg_fn)s %(organism)s %(peaks_fn)s"%{'opts':opts.pk_to_fa_args,'organism':organism,'fg_fn':fg_fn,'bg_fn':bg_fn,'peaks_fn':peaks_fn}]
+    steps.append(PPS('Peaks to Fasta',calls,env=os.environ))
+
+    # run THEME on fg
+    motif_fn = '%s_motifs.txt'%opts.exp_name
+    hyp_fn = org_settings['theme_hypotheses']
+    markov_fn = org_settings['theme_markov']
+    calls = ["THEME.py %(opts)s --motif-file=%(motif_fn)s %(fg_fn)s %(bg_fn)s %(hyp)s %(markov)s"%{'opts':opts.theme_args,'motif_fn':motif_fn,'fg_fn':fg_fn,'bg_fn':bg_fn,'hyp':hyp_fn,'markov':markov_fn}]
+    steps.append(PPS('Run THEME on foreground',calls,env=os.environ))
+
+    # run THEME randomization
+    random_motif_fn = '%s_motifs_rand.txt'%opts.exp_name
+    calls = ["THEME.py %(opts)s --randomization --motif-file=%(motif_fn)s %(fg_fn)s %(bg_fn)s %(hyp)s %(markov)s"%{'opts':opts.theme_args,'motif_fn':random_motif_fn,'fg_fn':fg_fn,'bg_fn':bg_fn,'hyp':hyp_fn,'markov':markov_fn}]
+    steps.append(PPS('Run THEME randomization',calls,env=os.environ))
+
+    # cleanup
+    rm_str = "rm -f %(d)s/*.out %(d)s/*.err %(d)s/*.script %(d)s/*.stats %(d)s/*.bed"
+    calls = [rm_str%{'d':exp_wrk_dir},
+             rm_str%{'d':cnt_wrk_dir}]
+    steps.append(PPS('Clean up',calls,env=os.environ))
+
+    pipeline.add_steps(steps)
+    pipeline.run(interactive=not opts.auto)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/scripts/combine_gerald_stats.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,27 @@
+#!/usr/bin/env python
+
+import sys, re, os
+from optparse import OptionParser
+from collections import defaultdict as dd
+
+parser = OptionParser()
+
+if __name__ == '__main__' :
+
+    opts, args = parser.parse_args(sys.argv[1:])
+
+    all_stats = dd(int)
+    for fn in args :
+        d = eval(open(fn).read())
+        for k,v in d.items() :
+            all_stats[k] += v
+            all_stats['tot. aligns'] += v
+
+    keys = all_stats.keys()
+    keys.sort()
+    keys.remove('tot. aligns')
+
+    for k in keys :
+        print k,':',all_stats[k],'(%.4f)'%(float(all_stats[k])/all_stats['tot. aligns'])
+
+    print 'tot. aligns',':',all_stats['tot. aligns']
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/scripts/compare_microarray_binding.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,81 @@
+#!/usr/bin/env python
+
+import sys
+
+from csv import reader, writer
+from collections import defaultdict as dd
+from optparse import OptionParser
+from subprocess import Popen, PIPE
+
+from chipsequtil import MACSOutput, BEDOutput, AffyBiocFile
+
+usage = '%prog -m <mapped MACS peaks file>|-b <mapped BED peaks file>|-a <mapped microarray file> [-m <MACS peaks file> ...] [-b <mapped BED peaks file> ...] [-a <mapped microarray file> ...]'
+description = """Join all files on the first column, concatenating records with \
+matching entries onto one line per entry.  Understands MACS peaks data as mapped \
+with *map_peaks_to_known_genes.py* utility microarray data as mapped by \
+*probeset_to_known_genes.py* utility, passed to program using *-m* and *-a* options \
+respectively. Output is a file where genes with binding data (MACS, BED files) have \
+column with a 1, 0 otherwise, and genes with microarray expression values have logFC \
+and adjusted p-value colums for each microarray file input. Internally, uses \
+*join_mapped_known_genes.py* with --binary-plus option to perform mapping and parses \
+output.  MACS fields are listed first, followed by BED fields, followed by microarray \
+fields."""
+
+epilog="Note: microarray files should have been created by bioconductor, and all files should have a row of fieldnames as the first line"
+parser = OptionParser(usage=usage,description=description,epilog=epilog)
+parser.add_option('-a','--affy-file',dest='affy_file',action='append',default=[],help='add a mapped microarray file')
+parser.add_option('-b','--bed-file',dest='bed_file',action='append',default=[],help='add a mapped BED formatted peaks (*.bed) file')
+parser.add_option('-m','--macs-file',dest='macs_file',action='append',default=[],help='add a mapped default MACS formatted peaks (*.xls) file')
+parser.add_option('--output',dest='output',default=None,help='file to output joined records to [default: stdout]')
+
+if __name__ == '__main__' :
+
+    opts,args = parser.parse_args(sys.argv[1:])
+
+    if len(args) > 0 :
+        parser.error('There were non-option command line arguments passed, all files should have a preceeding option indicating filetype')
+
+    if len(opts.macs_file) == 0 and len(opts.affy_file) == 0 :
+        parser.error('No files were passed in, aborting')
+
+    # call join_mapped_known_genes.py
+    fn_map = {}
+    fn_map['macs'] = ' '.join(['-m %s'%fn for fn in opts.macs_file])
+    fn_map['bed'] = ' '.join(['-b %s'%fn for fn in opts.bed_file])
+    fn_map['array'] = ' '.join(['-a %s'%fn for fn in opts.affy_file])
+    join_call = 'join_mapped_known_genes.py --binary-plus %(macs)s %(bed)s %(array)s'%fn_map
+    p = Popen(join_call, shell=True, stdout=PIPE,stderr=PIPE)
+    stdout, stderr = p.communicate()
+    if len(stderr) != 0 :
+        print stderr
+
+    joined_output = stdout.split('\n')
+    joined_output = joined_output[:-1] if joined_output[-1] == '' else joined_output
+
+    # determine which fields will end up in the file
+    header = joined_output[0].split('\t')
+
+    # always want gene and symbol
+    field_indices = [0,1]
+
+    # macs and bed fields are named by filename
+    for fn in opts.macs_file+opts.bed_file :
+        field_indices.append(header.index(fn))
+
+    # affy fields are index(fn)+5, index(fn)+8
+    for fn in opts.affy_file :
+        # just add all the microarray columns
+        fn_header_indices = [i for i,x in enumerate(header) if x.find(fn) != -1]
+        field_indices.extend(fn_header_indices)
+
+        #field_indices.append(header.index(fn))
+        #field_indices.append(header.index(fn)+5)
+        #field_indices.append(header.index(fn)+8)
+
+    out_f = open(opts.output,'w') if opts.output else sys.stdout
+    for line in joined_output :
+        line = line.split('\t')
+        out_f.write('\t'.join([line[i] for i in field_indices])+'\n')
+
+    if opts.output :
+        out_f.close()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/scripts/construct_bg_fasta.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,235 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import warnings
+
+from collections import defaultdict
+from optparse import OptionParser
+
+from chipsequtil import get_org_settings, RefGeneFile
+from chipsequtil.nib import NibDB
+from chipsequtil.util import MultiLineHelpFormatter
+from TAMO.seq import Fasta
+
+usage='%prog [options] <type> <organism> <foreground fasta>'
+description='Create background sequence databses for motif finding, etc.'
+parser = OptionParser(usage=usage,description=description,formatter=MultiLineHelpFormatter())
+
+
+def rejection_sampling(fg,settings_dict,gc_bins=20) :
+
+    genm_db = NibDB(settings_dict['genome_dir'])
+    annot = RefGeneFile(settings_dict['annotation_file'])
+
+
+    num_peak_bases = 0
+    for header, seq in fg.items() :
+        num_peak_bases += len(seq)
+
+
+if __name__ == '__main__' :
+
+    opts, args = parser.parse_args(sys.argv[1:])
+
+    if len(args) < 3 :
+        parser.error('Must provide three non-option arguments')
+
+    sample_type, organism, fg_fn = args[:3]
+
+    settings_dict = get_org_settings(organism)
+
+    fg = Fasta.load(fg_fn)
+    bg = rejection_sampling(fg,settings_dict)
+
+
+###############################################################
+# start Chris' code from rej_samp_bg_rand2.py
+    the_genes={} #list of distances to nearest TSS
+
+    # for each peak find the chromosome, distance to nearest
+    # gene, size of peaks in bases, and GC content
+    the_chrs,dists,sizes,gcs=[],[],[],[]
+
+    # number of bases in the fg sequences
+    size=0
+
+    for key in pos_seqs.keys():
+
+        size+=len(pos_seqs[key])
+
+        # chromosome first field in fasta headers from bed2seq.bedtoseq
+        chr=key.split(':')[0]
+
+        # adjust chromosomes in special cases
+        if re.search('random',chr):
+            continue
+        if chr=='chr20':
+            chr='chrX'
+        elif chr=='chr21':
+            chr='chrY'
+        if not the_genes.has_key(chr):
+            the_genes[chr]=[]
+
+        # start first int in second field of bed2seq.bedtoseq header
+        start=int(key.split(':')[1].split('-')[0])
+        midpoint=int(start+len(pos_seqs[key])/2)
+
+        # figure out which chromosome we're working on
+        tss_chr=tss[chr.split('chr')[-1]]
+
+        # D is the distances from all the genes, find minimum
+        D=[(s[0]-midpoint) for s in tss_chr]
+
+        # best distance for this peak
+        minD=min([abs(x) for x in D])
+        best=[d for d in D if abs(d)==minD]
+        dists.append(best[0])
+
+        # chromosome for this peak
+        the_chrs.append(chr)
+        seq=pos_seqs[key]
+
+        # calculate # bases and GC content
+        N=len(seq)
+        sizes.append(N)
+        gc=len([x for x in seq if (x=='G')or(x=='C')])/N
+        gcs.append(gc)
+
+    #bin GC content distribution
+    bins=20
+
+    # q is # of peaks w/ x% GC content
+    q=[0]*bins
+
+    for gc in gcs:
+        for i in range(bins):
+            win_start=i/bins
+            win_end=(i+1)/bins
+            if gc>=win_start and gc<win_end:
+                q[i]+=1
+                continue
+
+    # q is now % peaks w/ x% GC content
+    q=[x/Nseqs for x in q]
+    #print q
+
+    # c is # peaks w/ highest GC content
+    c=max(q)*Nseqs
+
+    # start generating bg sequences
+    print "Done assembling distance and gc content distributions"
+    genome_outfile=open(bg,'w')
+
+    # make twice as many 
+    size=round(size/(2*len(pos_seqs)))
+    bg_gcs,bg_sizes=[],[]
+    #for key in the_genes.keys():
+        #chrom=key.split('chr')[-1]
+        #the_genes[key]=[x[0] for x in tss[chrom]]
+
+    # C_TX is a list of all genes in (chromosome,gene start) tuples
+    C_TX=[]
+    for key in tss.keys():
+        chrom=key.split('chr')[-1]
+        for x in tss[chrom]:
+            C_TX.append((chrom,x[0]))
+
+    # generate a bg sequence for every fg sequence
+    for i in range(Nseqs):
+
+        # propose sequences until one is accepted
+        keep_going=1
+        while keep_going:
+            #random.shuffle(the_chrs)
+
+            # randomize the list of distances from genes
+            random.shuffle(dists)
+            #chr=the_chrs[0]
+
+            # pick the first distance, i.e. at random
+            d=dists[0]
+
+            #random.shuffle(the_genes[chr])
+
+            # randomize the gene list
+            random.shuffle(C_TX)
+
+            # randomize the peak sizes
+            random.shuffle(sizes)
+
+            # pick a random gene
+            (chr,coord)=C_TX[0]
+
+            #coord=the_genes[chr][0]
+            # propose a starting point for the bg sequence
+            midpoint=coord-d+random.randint(-100,100)
+
+            # propose a starting size for the bg sequence
+            size=sizes[0]
+            start=int(midpoint-int(size/2))
+            stop=int(midpoint+int(size/2))
+            id='chr'+chr.split('chr')[-1]+':'+str(start)+'-'+str(stop)
+            r=random.random()
+
+            # randomly choose strand
+            if r<0.5: strand='+'
+            else: strand='-'
+
+            # extract the proposed sequence
+            nib_title,seq=nibfrag.sequence('chr'+chr,start, stop,strand)
+            if not seq:
+                print 'NOT FOUND', chr,start,stop,
+                continue
+            else:
+
+                N,y=0,0
+                # calculate the GC content for the proposed sequence
+                for line in seq:
+                    s=line.upper()
+                    N+=len(line)
+                    y+=len([x for x in s if (x=='G')or(x=='C')])
+                    if line[0]=='N': continue
+                x=float(y)/N
+
+                # determine the GC bin for this sequence
+                #gc=float(len([x for x in seq if (x=='G')or(x=='C')]))/N
+                for i in range(bins):
+                    win_start=i/bins
+                    win_end=(i+1)/bins
+                    if x>=win_start and x<win_end:
+                        bin=i
+                        continue
+
+                # pick a uniform random number such that it does not exceed
+                # the maximum GC content distribution over bins
+                r=random.random()*c/Nseqs
+
+                # if the random number is <= the GC content for this
+                # proposed sequence, accept, otherwise reject
+                if r>q[bin]:
+                    #print 'skip'
+                    continue
+                else:
+                    #print bin
+                    bg_gcs.append(x)
+                    bg_sizes.append(size)
+                    keep_going-=1
+                    title='>%s\n'%id
+                    genome_outfile.write(title)
+                    for line in seq:
+                        genome_outfile.write(line.upper()+'\n')
+    print len(gcs)
+    print len(bg_gcs)
+    fg_mean,fg_sdev=mean_sdev(gcs)
+    print fg_mean,fg_sdev
+    #bg_mean,bg_sdev=mean_sdev(bg_gcs)
+    bg_mean=scipy.mean(bg_gcs)
+    bg_sdev=scipy.std(bg_gcs)
+    print bg_mean,bg_sdev
+    fg_size_m,fg_size_dev=mean_sdev(sizes)
+    bg_size_m,bg_size_dev=mean_sdev(bg_sizes)
+    print fg_size_m,fg_size_dev
+    print bg_size_m,bg_size_dev
+    genome_outfile.close()
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/scripts/create_pipeline_script.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,385 @@
+#!/usr/bin/env python
+
+from __future__ import with_statement
+import getpass
+import json
+import os
+import textwrap
+
+try:
+    import readline
+    import glob
+    readline.parse_and_bind("tab: complete")
+    readline.set_completer_delims('')
+
+    comp_states = {}
+    def basic_complete_file(text,state) :
+        #if text.strip() == '' :
+        #    text = './'
+        options = dict([(i,p) for i,p in enumerate(glob.glob(text+'*'))])
+        return options.get(state,None)
+
+    readline.set_completer(basic_complete_file)
+
+except ImportError:
+    print "Module readline not available."
+
+import re
+import stat
+import sys
+from optparse import OptionParser
+from subprocess import Popen, PIPE
+
+import chipsequtil
+from chipsequtil import get_global_settings, get_local_settings, check_org_settings, GLOBAL_SETTINGS_FN, LOCAL_SETTINGS_FN
+from terminalcontroller import TERM_ESCAPE, announce, warn, error, white, bold
+
+usage = "%prog"
+description = """Script for creating a custom run script for
+ChIPSeq/DNAse hypersensitivity experiments.  User is asked for
+paths and settings required for ChIPSeq analysis using the *chipseq_pipeline.py*
+utility and produces an executable run script with helpful information on how to
+run it.  Also creates a JSON formatted file containing all the parameters for
+this pipeline run."""
+epilog = "Note: this script only works in Unix-style environments"
+parser = OptionParser(usage=usage,description=description,epilog=epilog)
+
+
+script_template = """\
+#!/bin/bash
+
+# required parameters for the pipeline
+ORG=%(organism)s
+EXP_FN=%(exp_path)s
+CNT_FN=%(cnt_path)s
+
+# chipseq_pipeline.py is the main workhorse of this analysis
+# you may change any of the arguments below from their defaults
+
+chipseq_pipeline.py $ORG $EXP_FN $CNT_FN \\
+%(def_args)s
+"""
+
+start_text = """\
+This is an interactive script that creates an executable script to use for
+ChIPSeq analyses. When prompted for experiment and control files, tab
+completion is available a la bash or tcsh shells. Press Ctrl-C at any time to
+quit.
+"""
+
+end_text = """The script %(script_fn)s has been created to run this pipeline. \
+The script can now be run with:
+
+$> ./%(script_fn)s
+
+Have a nice day."""
+
+
+
+def wb(st) :
+    sys.stdout.write(white(bold(st)))
+
+
+def input(st,default=None) :
+
+    if default is None :
+        default_str = ''
+    else :
+        default_str = ' [default: ' + default + ' ] '
+
+    out = None
+    while out is None :
+        out = raw_input(white(bold(st))+default_str+white(bold(':'))+' \n')
+        if len(out) == 0 :
+            out = default
+
+    return out
+
+
+if __name__ == '__main__' :
+
+    TERM_ESCAPE = True
+
+    try :
+
+        pipeline_args = {}
+
+        # herro
+        announce('ChIPSeq Experiment Pipeline Script Generator')
+        print textwrap.fill(start_text)
+
+        opts, args = parser.parse_args(sys.argv[1:])
+        if len(args) > 0 :
+            warn("Arguments were passed, but this script doesn't accept any arguments, rudely ignoring them...\n")
+
+        # this dictionary will be used to generate a JSON formatted file with
+        # all the relevant settings for the pipeline
+        json_dict = {}
+
+        ############################################################################
+        # name of the experiment
+        ############################################################################
+        def_path = os.path.basename(os.getcwd())
+        exp_name = input('Experiment name',def_path)
+        exp_name = exp_name.replace(' ','_') # shhhhhhhh...
+
+        json_dict['experiment name'] = exp_name
+        json_dict['analysis path'] = os.getcwd()
+
+        ############################################################################
+        # experiment and control file
+        ############################################################################
+        align_text = "The pipeline can accept either BED, BOWTIE, SAM, or " \
+        "ELANDEXPORT formatted alignment files. SAM is the default " \
+        "format of files provided by the BMC pipeline.  Both experiment " \
+        "and control files must have the same format."
+        print textwrap.fill(align_text)
+
+        align_fmt = input("Which format are the alignment files in?",'SAM')
+        exp_path = input('Experiment alignment path')
+        exp_path = exp_path.strip()
+
+        lims_exp_url = input('Experiment LIMS sample URL, if applicable','none')
+        lims_exp_url = lims_exp_url.strip()
+
+        cntrl_path = input('Control alignment path (leave blank for no control)','none')
+        cntrl_path = cntrl_path.strip()
+
+        lims_cntrl_url = input('Control LIMS sample URL, if applicable','none')
+        lims_cntrl_url = lims_cntrl_url.strip()
+
+        if cntrl_path == 'none' :
+            cntrl_path = ''
+
+        if cntrl_path == '' :
+            print 'Analysis will be run with no control'
+
+        json_dict['experiment path'] = os.path.realpath(exp_path)
+        json_dict['experiment lims url'] = lims_exp_url
+        json_dict['control path'] = os.path.realpath(cntrl_path) if cntrl_path != '' else 'none'
+        json_dict['control lims url'] = lims_cntrl_url
+
+        ############################################################################
+        # organism + settings
+        ############################################################################
+        announce('Organism settings configuration')
+        global_settings = get_global_settings()
+        local_settings = get_local_settings()
+        valid_org_settings = global_settings.keys() + local_settings.keys()
+        valid_org_settings.sort()
+
+        org_text = """\
+Below are the organism settings available on this system.  The pipeline will
+use the settings for one organism (e.g. %(org)s) for the entire execution. If
+you do not see a set of settings that correspond to files you need you may
+add your own to %(local_org)s.  See %(glob_org)s for details.
+"""
+
+        print textwrap.fill(org_text%{'org':valid_org_settings[0],'local_org':LOCAL_SETTINGS_FN,'glob_org':GLOBAL_SETTINGS_FN},break_long_words=False)
+        print
+
+        wb('Available settings\n')
+        # global settings
+        print 'Global settings: (%s)'%GLOBAL_SETTINGS_FN
+        org_sets = [(k,global_settings[k]) for k in sorted(global_settings.keys())]
+        for org, settings in org_sets :
+            wb(org.ljust(8))
+            print ':', settings.get('description','No description')
+            #for k,v in settings.items() :
+            #    print ' '*4+k+": "+str(v)
+
+        # local settings
+        print 'Local settings: (%s)'%LOCAL_SETTINGS_FN
+        org_sets = [(k,local_settings[k]) for k in sorted(local_settings.keys())]
+        for org, settings in org_sets :
+            wb(org.ljust(8))
+            print ':', settings.get('description','No description')
+            #for k,v in settings.items() :
+            #    print ' '*4+k+": "+str(v)
+        org = ''
+        all_settings = {}
+        all_settings.update(global_settings)
+        all_settings.update(local_settings)
+
+        while org not in valid_org_settings :
+            org = input('Choose organism configuration, one of ('+','.join(valid_org_settings)+')')
+
+            # check for the required settings
+            required_settings = ['description','genome_dir','refgene_anno_path','theme_hypotheses','theme_markov']
+            if not check_org_settings(org,required_settings) :
+                warn(textwrap.fill('Selected organism settings must have the following settings defined:\n\
+                     %s\n\
+                     Either select another organism or define these settings in your local\
+                     configuration file.'%required_settings))
+                org = ''
+        print
+
+        json_dict['org'] = org
+
+        ############################################################################
+        # UCSC
+        ############################################################################
+
+        ucsc_text = """The pipeline can include a step to automatically make called
+peak data available on the web for integration with UCSC genome browser."""
+
+        print textwrap.fill(ucsc_text,break_long_words=False)
+
+        ucsc_integrate = input('Would you like to integrate this analysis with UCSC genome browser [y/n]?','y')
+        ucsc_integrate = False if ucsc_integrate == 'n' else True
+        ucsc_args = ''
+        stage_dir = '/nfs/antdata/web_stage/%s'%getpass.getuser()
+        stage_url = 'http://fraenkel.mit.edu/stage/%s'%getpass.getuser()
+        if ucsc_integrate :
+            ucsc_args = ['--ucsc']
+            ucsc_args = ' '.join(ucsc_args)
+
+        pipeline_args['--stage-dir'] = stage_dir
+        pipeline_args['--stage-url'] = stage_url
+
+        json_dict['stage dir'] = stage_dir
+        json_dict['stage url'] = stage_url
+
+        # TODO - consider letting user set these on script creation time
+        # any utility specific arguments?
+        #  - MACS
+        #  - THEME
+
+
+        ############################################################################
+        # various pipeline parameters
+        ############################################################################
+
+        # --macs-args
+        macs_args = ['--mfold=10,30','--format=%s'%align_fmt]
+        pval = ''
+        while not re.search('^\de-\d+$',pval) :
+            pval = input('What p-value should MACS use as a cutoff?','1e-5')
+        macs_args.append('--pvalue=%s'%pval)
+        pipeline_args['--macs-args'] = ' '.join(macs_args)
+
+        # --map-args
+        map_args = []
+        tss = ''
+        while tss.upper() not in ('TSS','GENE') :
+            tss = input('Should gene mapping be made in relation to transcription start site or full gene coordinates [TSS/gene]?','TSS')
+        if tss == 'TSS' :
+            map_args.append('--tss')
+
+        window = ''
+        while not re.search('^\d+,\d+$',window) :
+            window = input('What window would you like to use for mapping peaks to genes (upstream bases,downstream bases)?','10000,10000')
+        upstr, downstr = window.split(',')
+        map_args.extend(['--upstream-window=%s'%upstr,'--downstream-window=%s'%downstr])
+        pipeline_args['--map-args'] = ' '.join(map_args)
+
+        # --filter-peaks-args
+        filt_args =  ['--sort-by=pvalue']
+        fdr = ''
+        while not re.search('^\d+(\.\d+)?',fdr) and fdr != 'none' :
+            fdr = input('What FDR cutoff should be used, in %?','none')
+        if fdr != 'none' :
+            filt_args.append("--filter='fdr<%s'"%fdr)
+
+        top = ''
+        while not re.search('^\d+$',top) and top != 'ALL' :
+            top = input('How many peak sequences should be used for motif discovery when sorted by p-value [<# peaks>/ALL]','1000')
+        if top != 'ALL' :
+            filt_args.append('--top=%s'%top)
+
+        # tag filter for both pos and neg peaks
+        tags = ''
+        filt_neg_args = []
+        while not re.search('^\d+$',tags) and tags != 'ALL' :
+            tags = input('What tag count cutoff should be used as a minimum for positive and negative peaks? [<# peaks>/None]','20')
+        if tags != 'None' :
+            filt_args.append("--filter='tags>%s'"%tags)
+            filt_neg_args.append("--filter='tags>%s'"%tags)
+        pipeline_args['--filter-peaks-args'] = ' '.join(filt_args)
+        pipeline_args['--filter-neg-peaks-args'] = ' '.join(filt_neg_args)
+
+        # --peaks-to-fa-args
+        peaks_to_fa_args = []
+        width = ''
+        while not re.search('^\d+$',width) and width != 'NA' :
+            width = input('What width around peak summit should be used for motif analysis (NA to use entire peak)? [<# bases>/NA]','200')
+        if width != 'NA' :
+            peaks_to_fa_args.append('--fixed-peak-width=%s'%width)
+        else :
+            width = 'none'
+        pipeline_args['--peaks-to-fa-args'] = ' '.join(peaks_to_fa_args)
+
+        # --parallelize
+        parallel = input('Use cluster parallelization [y/n]?','y')
+        parallel = '--parallelize' if parallel.lower() != 'n' else ''
+
+        # each user-specified argument gets its own key
+        json_dict['format'] = align_fmt
+        json_dict['mapping type'] = tss
+        json_dict['mapping window'] = (upstr,downstr)
+        json_dict['FDR filter'] = fdr
+        json_dict['peaks used by THEME'] = top
+        json_dict['fixed peak width'] = width
+        json_dict['parallelize'] = parallel != ''
+        json_dict['peak tag count filter'] = tags
+
+        # put all the command line utility args in json_dict as its own dict
+        json_dict['pipeline args'] = pipeline_args
+
+        ############################################################################
+        # done with input, creating script and other stuff
+        ############################################################################
+        # if the experiment and control files are in a different directory,
+        # create symlinks for them
+        exp_dir,exp_fn = os.path.split(os.path.abspath(exp_path))
+        if exp_dir != os.getcwd() :
+            wb('Creating symlink for experiment file...\n')
+            if os.path.exists(exp_fn) :
+                if os.path.realpath(exp_fn) != os.path.abspath(exp_path) : # existing symlink  doesn't point to the same file, prompt to overwrite
+                    ans = raw_input('Symlink %s in current directory points to %s but you asked for %s, overwrite symbolic link? y/[n] '%(exp_fn,os.path.realpath(exp_fn),os.path.abspath(exp_path)))
+                    if ans == 'y' :
+                        os.remove(exp_fn)
+                        exp_fn = 'exp_'+exp_fn
+                        os.symlink(exp_path,exp_fn)
+            else :
+                exp_fn = 'exp_'+exp_fn
+                os.symlink(exp_path,exp_fn)
+
+        if cntrl_path != '' :
+            cntrl_dir,cntrl_fn = os.path.split(os.path.abspath(cntrl_path))
+            if cntrl_dir != os.getcwd() :
+                wb('Creating symlink for control file...\n')
+                if os.path.exists(cntrl_fn) :
+                    if os.path.realpath(cntrl_fn) != os.path.abspath(cntrl_path) : # existing symlink  doesn't point to the same file, prompt to overwrite
+                        ans = raw_input('Symlink %s in current directory points to %s but you asked for %s, overwrite symbolic link? y/[n] '%(cntrl_fn,os.path.realpath(cntrl_fn),os.path.abspath(cntrl_path)))
+                        if ans == 'y' :
+                            os.remove(cntrl_fn)
+                            cntrl_fn = 'cntrl_'+cntrl_fn
+                            os.symlink(cntrl_path,cntrl_fn)
+                else :
+                    cntrl_fn = 'cntrl_'+cntrl_fn
+                    os.symlink(cntrl_path,cntrl_fn)
+        else :
+            cntrl_fn = ''
+
+        # get default chipseq_pipeline.py args
+        pipeline_args = ' '.join(['%s="%s"'%(k,v) for k,v in pipeline_args.items()])
+        print 'chipseq_pipeline.py --exp-name=%s %s %s --print-args'%(exp_name,ucsc_args,pipeline_args)
+        def_args = Popen('chipseq_pipeline.py --exp-name=%s %s %s %s --print-args'%(exp_name,ucsc_args,parallel,pipeline_args),shell=True,stdout=PIPE,stderr=PIPE).communicate()[0]
+
+        wb('Creating script...\n')
+        script_fn = '%s_pipeline.sh'%exp_name
+        with open(script_fn,'w') as script_f :
+            script_f.write(script_template%{'exp_path':exp_fn,'cnt_path':cntrl_fn,'organism':org,'exp_name':exp_name,'def_args':def_args})
+            os.chmod(script_f.name,stat.S_IRWXU|stat.S_IRWXG|stat.S_IROTH)
+
+        print end_text%{'script_fn':script_fn}
+
+        wb('Creating parameter file...\n')
+        json_fn = '%s_params.json'%exp_name
+        with open(json_fn,'w') as json_f :
+            json.dump(json_dict,json_f,indent=4)
+
+    except KeyboardInterrupt :
+        sys.stderr.write('\n')
+        error('Script creation interrupted, aborting')
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/scripts/extract_promoters.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,79 @@
+#!/usr/bin/env python
+
+import re
+import sys
+from csv import writer
+from optparse import OptionParser
+
+from collections import defaultdict
+
+from chipsequtil import get_org_settings, RefGeneFile
+from chipsequtil.nib import NibDB
+from chipsequtil.util import MultiLineHelpFormatter as MF
+
+usage = "%prog [options] <organism>"
+description = """Extract the promoter sequences in FASTA format from all genes
+or a list of genes specified in an input file.  Gene annotation is RefGene
+corresponding to the organism passed in, paths returned by:
+
+$> org_settings.py <organism> refgene_anno_path
+$> org_settings.py <organism> genome_dir
+
+must be valid."""
+parser = OptionParser(usage=usage,description=description,formatter=MF())
+parser.add_option('-u','--upstream',type='int',default=3000,help='upstream window from TSS to extract [default: %default]')
+parser.add_option('-d','--downstream',type='int',default=1000,help='downstream window from TSS to extract [default: %default]')
+parser.add_option('-l','--gene-list',dest='gene_list',default=None,
+                  help='file containing a list of gene identifiers to extract, one per line [default: %default]')
+gene_type_choices = ['symbol','refgene']
+parser.add_option('-t','--gene-type',dest='gene_type',type='choice',
+                  choices=gene_type_choices,default=gene_type_choices[0],
+                  help='type of gene identifier in gene list, choose from %s [default: %%default]'%gene_type_choices)
+parser.add_option('-o','--output',dest='output',default=None,
+                  help='file to write fasta records to [default: stdout]')
+
+if __name__ == '__main__' :
+
+    opts, args = parser.parse_args(sys.argv[1:])
+
+    if len(args) != 1 :
+        parser.error('Exactly one argument is required')
+
+    org_settings = get_org_settings(args[0])
+
+    refgene_fn = org_settings['refgene_anno_path']
+    refgene_f = RefGeneFile(refgene_fn)
+
+    nib_db = NibDB(nib_dirs=[org_settings['genome_dir']])
+
+    gene_list = None
+    if opts.gene_list :
+        gene_list = [x.strip() for x in open(opts.gene_list).readlines()]
+
+    id_index = 'bin'
+    if opts.gene_type != gene_type_choices[0] :
+        if opts.gene_type  == 'refgene' :
+            id_index = 'name'
+
+    seq_recs = []
+    gene_map = defaultdict(list)
+    for rec in refgene_f :
+        if gene_list and rec[id_index] not in gene_list : continue # skip this one
+        st, end = max(0,int(rec['txStart'])-opts.upstream), min(int(rec['txStart'])+opts.downstream,nib_db.db_info[rec['chrom']]['nbases'])
+        key = (rec['chrom'],st,end,rec['strand'])
+        seq_recs.append(key)
+        gene_map[key[:-1]].append(rec['bin']+'/'+rec['name'])
+
+    fasta_recs = nib_db.get_fasta_batch(seq_recs)
+
+    out_f = open(opts.output,'w') if opts.output else sys.stdout
+    header_regex = re.compile('^.*(chr[0-9MXY]+).*:([0-9]+)-([0-9]+).*$')
+    for header, seq in zip(*fasta_recs) :
+        # map sequences back to gene names using the header
+        reg_obj = header_regex.search(header)
+        if reg_obj is not None :
+            chrm,st,end = reg_obj.groups()
+            gene_names = gene_map.get((chrm,int(st),int(end)))
+            if gene_names is not None :
+                header = header.strip()+':'+','.join(gene_names)+'\n'
+        out_f.write(header+seq+'\n')
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/scripts/filter_bed_by_position_count.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,39 @@
+#!/usr/bin/env python
+
+import sys
+
+from csv import reader, writer
+from optparse import OptionParser
+
+usage = '%prog [options] <bed file>'
+description = """Analyze BED file and filter out alignments above some threshold \
+that align to a single genomic position."""
+epilog="Note: only works if BED file is sorted!"
+parser = OptionParser(usage=usage,description=description,epilog=epilog)
+parser.add_option('-n','--max-count',dest='max_count',default=5,type='int',help='max tag count at a given position, filter above [default: %default]')
+parser.add_option('--output',dest='output',default=None,help='write output to file')
+
+if __name__ == '__main__' :
+
+    opts, args = parser.parse_args(sys.argv[1:])
+
+    if len(args) != 1 :
+        parser.error('Exactly one sorted .bed file is required')
+
+    bed_fn = args[0]
+
+    bed_reader = reader(open(bed_fn),delimiter='\t')
+    out_f = open(opts.output,'w') if opts.output else sys.stdout
+    bed_writer = writer(out_f,delimiter='\t')
+
+    curr_key, curr_key_count = None, 0
+    for rec in bed_reader :
+        key = rec[:3] # chromosome, start, end
+        if key != curr_key :
+            curr_key, curr_key_count = key, 0
+        if curr_key_count < opts.max_count :
+            bed_writer.writerow(rec)
+            curr_key_count += 1
+        else :
+            continue
+    if opts.output : out_f.close()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/scripts/filter_gps_peaks.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,215 @@
+#!/usr/bin/env python
+
+import re
+import os
+import sys
+from collections import defaultdict
+from optparse import OptionParser, SUPPRESS_HELP
+from random import shuffle
+
+from chipsequtil import GPSFile, get_file_parts
+from chipsequtil.util import MultiLineHelpFormatter as MF
+from terminalcontroller import warn
+
+usage = "%prog [options] <GPS peak file>"
+description = """\
+Filter GPS peaks by supplied criteria.  Available filter features are:
+
+IP
+Control
+Fold
+qvalue
+pvalue
+IPvsEMP
+IPvsCTR
+
+Filters are provided as expressions using the [-f |--filter] option, e.g. the command
+
+%prog -f "IP>100" --filter="pvalue<=1e-9" <GPS peak file>
+
+finds only peaks with more than 100 tags and a pvalue of less than 1e9.  Any 
+number of filters may be provided, and only peaks that match *all* filters pass. \
+User is warned if filters result in zero results.  Only inequality operators are \
+valid.  Invoking with no filter arguments returns all peaks.  To sort, use the \
+--sort-by option, e.g.
+
+%prog -f "pvalue<=1e-9" --sort-by=pvalue <GPS peak file>
+
+sorts peaks with a pvalue smaller than 1e-9 with the smallest pvalue peaks.  All fields \
+are sorted ascending by default.  Output is prepended with comments describing what \
+the file contains, i.e. which filters are applied, how many records there are, etc.
+
+Note: GPS P_-log10 and Q_-log10 values are converted to normal pvalues and qvalues
+"""
+
+parser = OptionParser(usage=usage,description=description,formatter=MF())
+parser.add_option('-f','--filter',dest='filters',default=[],action='append',help='add filter expression')
+parser.add_option('--sort-by',dest='sort_by',default='',help='comma delimited list of features to sort by, filtered peaks are not sorted by default, if provided peaks are sorted ascending by default')
+parser.add_option('--sort-dir',dest='sort_dir',type='choice',choices=['ASCEND','DESCEND'],default='ASCEND',help='direction to sort [default: %default]')
+parser.add_option('--top',dest='top',type='int',default=None,help='accepts an integer, output at most this many peaks [default: all]')
+parser.add_option('--output',dest='output',default=None,help='filename to output filtered peaks to [default: stdout]')
+parser.add_option('--encode-filters',dest='encode_filters',action='store_true',help='write out records to a file <GPS peaks file>_<filters>.xls (incompatible with --output option)')
+parser.add_option('--summary',dest='summary',action='store_true',help='only print out summary information for the filter')
+parser.add_option('--no-header',dest='no_header',action='store_true',help='do not print out header or metadata info')
+parser.add_option('--shuffle',dest='shuffle',action='store_true',help='shuffle order of filtered records, useful for selecting random peaks')
+
+parser.add_option('--print-encoded-fn',dest='print_encoded_fn',action='store_true',help="print out the filename that would be created by --encode-filters")
+
+# make condition function objects using closures
+_lt = lambda x,y : x < y
+_lte = lambda x,y : x <= y
+_gt = lambda x,y : x > y
+_gte = lambda x,y : x >= y
+_cond_map = {'<':_lt,'<=':_lte,'>':_gt,'>=':_gte,None:None}
+
+def make_condition(low_val=None,low_test=_gte,high_val=None,high_test=_lte) :
+    if low_val and not high_val :
+        return lambda x: low_test(low_val,x)
+    elif not low_val and high_val :
+        return lambda x: high_test(x,high_val)
+    elif low_val and high_val :
+        return lambda x: low_test(low_val,x) and high_test(x,high_val)
+    else :
+        return lambda x: True # identity with no constraints
+
+# regex and function for parsing filter strings
+numeric_regex_str = r'\d+(?:\.\d*)?(?:(?:e|E)-?\d+)?' # matches numeric-looking patterns, e.g. 1, 1.234, 1e4, 1.234E-5, etc.
+separator_regex_str = r'(?:>|>=|<|<=)'
+ids_regex_str = r'(?:IP|Control|Fold|qvalue|pvalue|IPvsEMP|IPvsCTR)'
+filter_regex = re.compile('^(%(num)s)?(%(sep)s)?(%(id)s)(%(sep)s)?(%(num)s)?$'%{'num':numeric_regex_str,'sep':separator_regex_str,'id':ids_regex_str})
+
+class FilterException(Exception) : pass
+
+def parse_filter(filter_str) :
+    match = filter_regex.search(filter_str.strip())
+    if match is None :
+        raise FilterException('Filter %s is formatted incorrectly'%filter_str)
+    low_val, low_test, field, high_test, high_val = match.groups()
+    low_val = float(low_val) if low_val else low_val
+    high_val = float(high_val) if high_val else high_val
+    return field, make_condition(low_val,_cond_map[low_test],high_val,_cond_map[high_test])
+
+_sort_keys = {'length': lambda x: int(x[3]),
+              'tags': lambda x: int(x[5]),
+              'pvalue': lambda x: 10**(float(x[6])/-10),
+              'fold_enrichment': lambda x: float(x[7]),
+              'fdr': lambda x: float(x[8]),
+             }
+
+
+summary_str = """\
+# This output was generated by filter_macs_peaks.py, filtered from %(macs_fn)s
+# Number of peaks: %(num_recs)d
+# Filters: %(filters)s
+# Sorted by: %(sort_by)s
+# Shuffled: %(shuffled)s
+"""
+if __name__ == '__main__' :
+
+    opts, args = parser.parse_args(sys.argv[1:])
+
+    if len(args) < 1 :
+        parser.error('Must provide one GPS peaks file')
+
+    if opts.output is not None and opts.encode_filters :
+        parser.error('--output and --encode-filters options are mutually exclusive')
+
+    # set where to write output
+    if opts.encode_filters :
+        # construct filename additions
+        fn_str = ''
+        opts.filters.sort()
+        for filt in opts.filters :
+            filter_str = filt.replace(' ','')
+            filter_str = filter_str.replace('>=','_GTE_')
+            filter_str = filter_str.replace('<=','_LTE_')
+            filter_str = filter_str.replace('>','_GT_')
+            filter_str = filter_str.replace('<','_LT_')
+            fn_str += '_%s'%filter_str
+
+        if opts.top is not None :
+            fn_str += '_top%d'%opts.top
+
+        if len(opts.sort_by) != 0 :
+            fn_str += '_sortby_%s'%opts.sort_by
+
+        if opts.shuffle :
+            fn_str += '_shuffled'
+
+        macs_path,macs_fn,macs_basefn,macs_ext = get_file_parts(args[0])
+        encoded_fn = os.path.join(macs_path,macs_basefn+fn_str+macs_ext)
+        if opts.print_encoded_fn :
+            sys.stdout.write(encoded_fn)
+            sys.exit(0)
+        else :
+            out_f = open(encoded_fn,'w')
+    elif opts.output :
+        out_f = open(opts.output,'w')
+    else :
+        out_f = sys.stdout
+
+    # parse the filters
+    field_filters = defaultdict(list)
+    for filter in opts.filters :
+        field, filter_cond = parse_filter(filter)
+        field_filters[field].append(filter_cond)
+
+    # start processing GPS file
+    peaks = GPSFile(args[0])
+
+    # filter the records
+    pass_recs = []
+    for peak in peaks :
+        # test each of the fields, if any one fails skip the record
+        if not all([c(int(peak['IP'])) for c in field_filters['IP']]) or \
+           not all([c(int(peak['Control'])) for c in field_filters['Control']]) or \
+           not all([c(float(peak['Fold'])) for c in field_filters['Fold']]) or \
+           not all([c(10**(float(peak['Q_-lg10'])/-10)) for c in field_filters['qvalue']]) or \
+           not all([c(10**(float(peak['P_-lg10'])/-10)) for c in field_filters['pvalue']]) or \
+           not all([c(float(peak['IPvsEMP'])) for c in field_filters['IPvsEMP']]) or \
+           not all([c(float(peak['IPvsCTR'])) for c in field_filters['IPvsCTR']]) :
+           continue
+        else :
+            pass_recs.append([peak[k] for k in GPSFile.FIELD_NAMES])
+
+    if len(pass_recs) == 0 :
+        warn('WARNING: no records remain after filtering\n')
+        sys.exit(1)
+
+    # sorting
+    if opts.sort_by :
+        pass_recs.sort(key=_sort_keys[opts.sort_by],reverse=opts.sort_dir != 'ASCEND')
+
+    # top records
+    num_recs = len(pass_recs) if not opts.top else min(len(pass_recs),opts.top)
+
+    # construct the summary string
+    filters_str = 'none' if len(opts.filters) == 0  else ', '.join(opts.filters)
+    sort_str = 'original order' if not opts.sort_by else opts.sort_by+', '+opts.sort_dir
+    shuffled_str = str(opts.shuffle)
+    summary = summary_str%{'macs_fn':args[0],'num_recs':num_recs,
+                           'filters':filters_str,
+                           'sort_by':sort_str,
+                           'shuffled':shuffled_str}
+
+    # print summary only
+    if opts.summary :
+        sys.stdout.write(summary)
+        sys.exit(0)
+
+    # write out the header cuz it's a nice thing to do
+    if not opts.no_header :
+        out_f.write(summary)
+        out_f.write('\t'.join(GPSFile.FIELD_NAMES)+'\n')
+
+    # write out records
+    if opts.shuffle :
+        shuffle(pass_recs)
+    out_recs = pass_recs[:num_recs]
+
+    for rec in out_recs :
+        # rec[0] is a tuple of (chromosome,start pos,original string)
+        out_f.write('\t'.join([rec[0][2]]+map(str,rec[1:]))+'\n')
+
+    # good programming practice
+    out_f.close()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/scripts/filter_macs_peaks.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,210 @@
+#!/usr/bin/env python
+
+import re
+import os
+import sys
+from collections import defaultdict
+from optparse import OptionParser, SUPPRESS_HELP
+from random import shuffle
+
+from chipsequtil import MACSFile, MACSOutput, get_file_parts
+from chipsequtil.util import MultiLineHelpFormatter as MF
+from terminalcontroller import warn
+
+usage = "%prog [options] <MACS peak file>"
+description = """\
+Filter MACS peaks by supplied criteria.  Available filter features are:
+
+length
+tags
+pvalue
+fold_enrichment
+fdr
+
+Filters are provided as expressions using the [-f |--filter] option, e.g. the command
+
+%prog -f "tags>100" --filter="pvalue<=1e-9" --filter="100<length<=200" <MACS peak file>
+
+finds only peaks with more than 100 tags, a pvalue of less than 1e9, and a length \
+between 100, exclusive, and 200, inclusive.  Any number of filters may be provided, \
+and only peaks that match *all* filters pass.  User is warned if filters result in \
+zero results.  Only inequality operators are valid.  Invoking with no filter arguments \
+returns all peaks.  To sort, use the --sort-by option, e.g.
+
+%prog -f "pvalue<=1e-9" --sort-by=pvalue <MACS peak file>
+
+sorts peaks with a pvalue smaller than 1e-9 with the smallest pvalue peaks.  All fields \
+are sorted ascending by default.  Output is prepended with comments describing what \
+the file contains, i.e. which filters are applied, how many records there are, etc.
+
+Note: MACS -10*log10(pvalue) values are converted to normal pvalues
+"""
+
+parser = OptionParser(usage=usage,description=description,formatter=MF())
+parser.add_option('-f','--filter',dest='filters',default=[],action='append',help='add filter expression')
+parser.add_option('--sort-by',dest='sort_by',default='',help='comma delimited list of features to sort by, filtered peaks are not sorted by default, if provided peaks are sorted ascending by default')
+parser.add_option('--sort-dir',dest='sort_dir',type='choice',choices=['ASCEND','DESCEND'],default='ASCEND',help='direction to sort [default: %default]')
+parser.add_option('--top',dest='top',type='int',default=None,help='accepts an integer, output at most this many peaks [default: all]')
+parser.add_option('--output',dest='output',default=None,help='filename to output filtered peaks to [default: stdout]')
+parser.add_option('--encode-filters',dest='encode_filters',action='store_true',help='write out records to a file <MACS peaks file>_<filters>.xls (incompatible with --output option)')
+parser.add_option('--summary',dest='summary',action='store_true',help='only print out summary information for the filter')
+parser.add_option('--no-header',dest='no_header',action='store_true',help='do not print out header or metadata info')
+parser.add_option('--shuffle',dest='shuffle',action='store_true',help='shuffle order of filtered records, useful for selecting random peaks')
+
+parser.add_option('--print-encoded-fn',dest='print_encoded_fn',action='store_true',help="print out the filename that would be created by --encode-filters")
+
+# make condition function objects using closures
+_lt = lambda x,y : x < y
+_lte = lambda x,y : x <= y
+_gt = lambda x,y : x > y
+_gte = lambda x,y : x >= y
+_cond_map = {'<':_lt,'<=':_lte,'>':_gt,'>=':_gte,None:None}
+
+def make_condition(low_val=None,low_test=_gte,high_val=None,high_test=_lte) :
+    if low_val and not high_val :
+        return lambda x: low_test(low_val,x)
+    elif not low_val and high_val :
+        return lambda x: high_test(x,high_val)
+    elif low_val and high_val :
+        return lambda x: low_test(low_val,x) and high_test(x,high_val)
+    else :
+        return lambda x: True # identity with no constraints
+
+# regex and function for parsing filter strings
+numeric_regex_str = r'\d+(?:\.\d*)?(?:(?:e|E)-?\d+)?' # matches numeric-looking patterns, e.g. 1, 1.234, 1e4, 1.234E-5, etc.
+separator_regex_str = r'(?:>|>=|<|<=)'
+ids_regex_str = r'(?:tags|pvalue|fold_enrichment|fdr|length)'
+filter_regex = re.compile('^(%(num)s)?(%(sep)s)?(%(id)s)(%(sep)s)?(%(num)s)?$'%{'num':numeric_regex_str,'sep':separator_regex_str,'id':ids_regex_str})
+
+class FilterException(Exception) : pass
+
+def parse_filter(filter_str) :
+    match = filter_regex.search(filter_str.strip())
+    if match is None :
+        raise FilterException('Filter %s is formatted incorrectly'%filter_str)
+    low_val, low_test, field, high_test, high_val = match.groups()
+    low_val = float(low_val) if low_val else low_val
+    high_val = float(high_val) if high_val else high_val
+    return field, make_condition(low_val,_cond_map[low_test],high_val,_cond_map[high_test])
+
+_sort_keys = {'length': lambda x: int(x[3]),
+              'tags': lambda x: int(x[5]),
+              'pvalue': lambda x: 10**(float(x[6])/-10),
+              'fold_enrichment': lambda x: float(x[7]),
+              'fdr': lambda x: float(x[8]),
+             }
+
+
+summary_str = """\
+# This output was generated by filter_macs_peaks.py, filtered from %(macs_fn)s
+# Number of peaks: %(num_recs)d
+# Filters: %(filters)s
+# Sorted by: %(sort_by)s
+# Shuffled: %(shuffled)s
+"""
+if __name__ == '__main__' :
+
+    opts, args = parser.parse_args(sys.argv[1:])
+
+    if len(args) < 1 :
+        parser.error('Must provide one MACS peaks file')
+
+    if opts.output is not None and opts.encode_filters :
+        parser.error('--output and --encode-filters options are mutually exclusive')
+
+    # set where to write output
+    if opts.encode_filters :
+        # construct filename additions
+        fn_str = ''
+        opts.filters.sort()
+        for filt in opts.filters :
+            filter_str = filt.replace(' ','')
+            filter_str = filter_str.replace('>=','_GTE_')
+            filter_str = filter_str.replace('<=','_LTE_')
+            filter_str = filter_str.replace('>','_GT_')
+            filter_str = filter_str.replace('<','_LT_')
+            fn_str += '_%s'%filter_str
+
+        if opts.top is not None :
+            fn_str += '_top%d'%opts.top
+
+        if len(opts.sort_by) != 0 :
+            fn_str += '_sortby_%s'%opts.sort_by
+
+        if opts.shuffle :
+            fn_str += '_shuffled'
+
+        macs_path,macs_fn,macs_basefn,macs_ext = get_file_parts(args[0])
+        encoded_fn = os.path.join(macs_path,macs_basefn+fn_str+macs_ext)
+        if opts.print_encoded_fn :
+            sys.stdout.write(encoded_fn)
+            sys.exit(0)
+        else :
+            out_f = open(encoded_fn,'w')
+    elif opts.output :
+        out_f = open(opts.output,'w')
+    else :
+        out_f = sys.stdout
+
+    # parse the filters
+    field_filters = defaultdict(list)
+    for filter in opts.filters :
+        field, filter_cond = parse_filter(filter)
+        field_filters[field].append(filter_cond)
+
+    # start processing MACS file
+    peaks = MACSFile(args[0])
+
+    # filter the records
+    pass_recs = []
+    for peak in peaks :
+        # test each of the fields, if any one fails skip the record
+        if not all([c(int(peak['length'])) for c in field_filters['length']]) or \
+           not all([c(int(peak['tags'])) for c in field_filters['tags']]) or \
+           not all([c(10**(float(peak['-10*log10(pvalue)'])/-10)) for c in field_filters['pvalue']]) or \
+           not all([c(float(peak['fold_enrichment'])) for c in field_filters['fold_enrichment']]) or \
+           not all([c(float(peak['FDR(%)'])) for c in field_filters['fdr']]) :
+           continue
+        else :
+            pass_recs.append([peak[k] for k in MACSOutput.FIELD_NAMES])
+
+    if len(pass_recs) == 0 :
+        warn('WARNING: no records remain after filtering\n')
+        sys.exit(1)
+
+    # sorting
+    if opts.sort_by :
+        pass_recs.sort(key=_sort_keys[opts.sort_by],reverse=opts.sort_dir != 'ASCEND')
+
+    # top records
+    num_recs = len(pass_recs) if not opts.top else min(len(pass_recs),opts.top)
+
+    # construct the summary string
+    filters_str = 'none' if len(opts.filters) == 0  else ', '.join(opts.filters)
+    sort_str = 'original order' if not opts.sort_by else opts.sort_by+', '+opts.sort_dir
+    shuffled_str = str(opts.shuffle)
+    summary = summary_str%{'macs_fn':args[0],'num_recs':num_recs,
+                           'filters':filters_str,
+                           'sort_by':sort_str,
+                           'shuffled':shuffled_str}
+
+    # print summary only
+    if opts.summary :
+        sys.stdout.write(summary)
+        sys.exit(0)
+
+    # write out the header cuz it's a nice thing to do
+    if not opts.no_header :
+        out_f.write(summary)
+        out_f.write('\t'.join(MACSOutput.FIELD_NAMES)+'\n')
+
+    # write out records
+    if opts.shuffle :
+        shuffle(pass_recs)
+    out_recs = pass_recs[:num_recs]
+
+    for rec in out_recs :
+        out_f.write('\t'.join(map(str,rec))+'\n')
+
+    # good programming practice
+    out_f.close()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/scripts/filter_mapped_known_genes.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,98 @@
+#!/usr/bin/env python
+
+import re
+import sys
+
+from csv import reader, writer
+from collections import defaultdict as dd
+from optparse import OptionParser
+
+from chipsequtil.util import MultiLineHelpFormatter as MF
+
+usage = '%prog [options] <mapped known genes file>'
+description = """Filter columns and rows from *join_mapped_known_genes.py* output which was \
+invoked with *--binary-plus* and *--field-types* flags.  Specify full column names for either \
+binding or expression data with the *--bind-cols* and *--affy-cols* arguments, respectively. \
+The special fieldname *MAPPED* from *join_mapped_known_genes.py* is used to determine whether \
+a file contains a mapping for each gene.  To filter genes by their associated binding or \
+expression data, specify *--bind-filter* or *--affy-filter* as follows:
+
+  - *any* - report gene if at least one input file maps to the gene
+  - *all* - report gene if every input file maps to the gene
+  - *absent* - report gene if no input file maps to the gene
+  - *none* - do not filter genes at all (default)
+
+Results of binding and expression filters are 'and'ed together, e.g.:
+
+--bind-filter=all --affy-filter=absent
+
+returns only genes for which all binding files and none of the expression files map.
+"""
+epilog='Note: when specifying column names, be sure to escape characters like (,),&,*,etc... \
+that shells interpret with a \\, e.g. --bind-cols=-10\\*log10\\(pvalue\\)'
+parser = OptionParser(usage=usage,description=description,epilog=epilog, formatter=MF())
+parser.add_option('--bind-cols',dest='bind_cols',default='',help='comma delimited list of binding data column names to include, [default: all]')
+parser.add_option('--affy-cols',dest='affy_cols',default='',help='comma delimited list of expression data column names to include, [default: all]')
+parser.add_option('--bind-filter',dest='bind_filt',type='choice',choices=['any','all','absent','none'],default='none',help='gene set to include based on binding data [default: %default]')
+parser.add_option('--affy-filter',dest='affy_filt',type='choice',choices=['any','all','absent','none'],default='none',help='gene set to include based on expression data [default: %default]')
+parser.add_option('--output',dest='output',default=None,help='write output to file')
+
+
+def match_headers(patts,field) :
+    for p in patts :
+        if field.endswith(p) : return True
+    return False
+
+def filter_vector(type,vec) :
+    if type == 'any' :
+        return '1' in vec
+    elif type == 'all' :
+        return all([x=='1' for x in vec])
+    elif type == 'absent' :
+        return not ('1' in vec)
+    else :
+        return True
+
+if __name__ == '__main__' :
+
+    opts, args = parser.parse_args(sys.argv[1:])
+
+    if len(args) != 1 :
+        parser.error('Exactly one mapped file must be provided')
+
+    map_fn = args[0]
+
+    map_reader = reader(open(map_fn),delimiter='\t')
+    headers = map_reader.next()
+    bind_headers = [i for i,x in enumerate(headers) if x.startswith('BIND:')]
+    bind_map_headers = [i for i,x in enumerate(headers) if x.startswith('BIND:') and x.endswith('MAPPED')]
+    affy_headers = [i for i,x in enumerate(headers) if x.startswith('AFFY:')]
+    affy_map_headers = [i for i,x in enumerate(headers) if x.startswith('AFFY:') and x.endswith('MAPPED')]
+
+    if len(bind_headers) == 0 and len(affy_headers) == 0 :
+        parser.error('No BIND: or AFFY: columns were found in the mapping, was *join_mapped_known_genes.py* run with the *--field-types* option?')
+
+    # figure out which columns user wants
+    header_indices = [0,1] # always output knowngene and symbol
+
+    bind_header_patts = opts.bind_cols.split(',')
+    header_indices += [i for i in bind_headers if match_headers(bind_header_patts,headers[i])]
+
+    affy_header_patts = opts.affy_cols.split(',')
+    header_indices += [i for i in affy_headers if match_headers(affy_header_patts,headers[i])]
+
+    out_f = open(opts.output,'w') if opts.output else sys.stdout
+    map_writer = writer(out_f,delimiter='\t')
+
+    map_writer.writerow([headers[i] for i in header_indices])
+    for rec in map_reader :
+        bind_vector = [rec[i] for i in bind_map_headers]
+        bind_pass = filter_vector(opts.bind_filt,bind_vector)
+
+        affy_vector = [rec[i] for i in affy_map_headers]
+        affy_pass = filter_vector(opts.affy_filt,affy_vector)
+
+        if bind_pass and affy_pass :
+            map_writer.writerow([rec[i] for i in header_indices])
+
+    if opts.output : out_f.close()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/scripts/generate_stats_doc.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,25 @@
+#!/usr/bin/env python
+
+from matplotlib.pyplot import *
+
+from reStUtil import *
+
+if __name__ == '__main__' :
+
+    # read stats
+    # - common read sequences
+    # - overall quality scores
+
+
+    # alignment stats
+    # - # alignments
+    # - uniquely aligned
+    # - multi reads
+    # - fail filter
+    # - alignments per chromosome bar chart
+
+
+    # peak stats
+
+
+    # motif stats and plots
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/scripts/gerald_stats.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,113 @@
+#!/usr/bin/env python
+
+import sys, re, os
+from datetime import datetime
+from optparse import OptionParser
+from collections import defaultdict as dd
+#from progressbar import ProgressBar
+from csv import reader, writer
+
+from chipsequtil import get_file_parts
+from chipsequtil.util import MultiLineHelpFormatter as MF
+from reStUtil import ReStDocument, ReStSimpleTable
+
+usage = "%prog [options] <filename> [<filename>...]"
+description="""\
+Outputs various stats about the GERALD formatted file(s) input. If multiple
+files are provided statistics are aggregated according to the specified output
+format.  Output formats available via --format=X :
+
+  # *python* - print an eval()'able python dictionary w/ counts
+  # *rst* - print statistics in a reStructured text table (default)
+  # *tab* - print statistics in a tab delimited form w/ header names
+
+Except for *python* format, each input file has its own output line.  *python*
+summarizes all alignments.
+"""
+
+parser = OptionParser(usage=usage,description=description,formatter=MF())
+parser.add_option('--output',dest='output',default=None,help='write output to file [default: stdout]')
+parser.add_option('--format',dest='format',type='choice',choices=['python','rst','tab'],default='rst',help='format to print out stats [default: %default]')
+
+def log(st) :
+    print datetime.now().isoformat()+' - '+st
+
+re_digits_nondigits = re.compile(r'\d+|\D+')
+def format_with_commas(value,format='%s'):
+    parts = re_digits_nondigits.findall(format % (value,))
+    for i in xrange(len(parts)):
+        s = parts[i]
+        if s.isdigit():
+            parts[i] = _commafy(s)
+            break
+    return ''.join(parts)
+
+def _commafy(s):
+
+    r = []
+    for i, c in enumerate(reversed(s)):
+        if i and (not (i % 3)):
+            r.insert(0, ',')
+        r.insert(0, c)
+    return ''.join(r)
+
+if __name__ == '__main__' :
+
+    opts,args = parser.parse_args(sys.argv[1:])
+
+    gerald_fns = args
+
+    all_stats = dd(int)
+    stat_dicts = {}
+    stats_fields = ["sample",
+                    "total alignments",
+                    "% align unique",
+                    "# reads aligned unique",
+                    "% align repeat",
+                    "# reads align repeat",
+                    "% align none",
+                    "# reads align none"
+                   ]
+
+
+    data_rows = []
+    for gerald_fn in gerald_fns :
+        stats = stat_dicts[gerald_fn] = dd(int)
+
+        fnpath,fn,fnbase,fnext = get_file_parts(gerald_fn)
+        gerald_lines = reader(open(gerald_fn),delimiter='\t')
+        for row in gerald_lines :
+            m = re.match('^(\d+):(\d+):(\d+)$',row[10])
+            if m is not None :
+                stats['multiread'] += 1
+                all_stats['multiread'] += 1
+            else :
+                stats[row[10]] += 1
+                all_stats[row[10]] += 1
+
+        tot_reads = sum(stats.values())/1.-stats.get('QC',0)
+        unique_reads = sum([v for k,v in stats.items() if k.startswith('chr')])
+        repeat_reads = stats.get('multiread',0)
+        nomap_reads = stats.get('NM',0)
+        data_row = [fn,format_with_commas(int(tot_reads)),
+                    '%.1f'%(unique_reads/tot_reads*100),format_with_commas(unique_reads),
+                    '%.1f'%(repeat_reads/tot_reads*100),format_with_commas(repeat_reads),
+                    '%.1f'%(nomap_reads/tot_reads*100),format_with_commas(nomap_reads)]
+
+        data_rows.append(data_row)
+
+    out_f = open(opts.output,'w') if opts.output is not None else sys.stdout
+
+    if opts.format == 'python' :
+        out_f.write(dict(all_stats))
+    elif opts.format == 'rst' :
+        doc = ReStDocument(out_f)
+        table = ReStSimpleTable(header=stats_fields,data=data_rows)
+        doc.add(table)
+        doc.write()
+    elif opts.format == 'tab' :
+        out_w = writer(out_f,delimiter='\t')
+        out_w.writerow(stats_fields)
+        out_w.writerows(data_rows)
+
+    if opts.output is not None : out_f.close()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/scripts/gerald_to_bed.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,82 @@
+#!/usr/bin/env python
+
+import os
+import re
+import sys
+
+from optparse import OptionParser
+from csv import DictReader, DictWriter
+from chipsequtil import get_file_parts, GERALDOutput
+
+usage = "%prog [options] <GERALD file> [<GERALD file>...]"
+
+description = """\
+Convert the GERALD alignment formatted files into BED format.  Input file named
+<path>/<filename>.<ext> is translated into <path>/<filename>.bed unless --output
+or --stdout is specified, in which case formatted lines are written to file or
+standard output, respectively.  If multiple input files are supplied with the
+--output or --stdout option all formatted lines are concatenated together.
+Formatting only occurs for GERALD input lines that have a valid Match Position
+field (i.e. successfully aligned somewhere)."""
+
+parser = OptionParser(usage=usage, description=description)
+parser.add_option('--output',dest='output',default=None,help='write all records to file')
+parser.add_option('--stdout',dest='stdout',action='store_true',help='write out all formatted lines to stdout')
+parser.add_option('--min-fields',dest='min_fields',action='store_true',help='only format the first three fields')
+parser.add_option('--pass-only',dest='pass_only',action='store_true',help='only format lines with Y in the Pass Filtering field')
+parser.add_option('--chromo-strip',dest='chromo_strip',default='.fa',help='pattern to remove from chromo field in BED output (e.g. --chromo-strip=.fa to remve .fa from chrX.fa) [default: %default]')
+
+
+
+if __name__ == '__main__' :
+
+    opts,args = parser.parse_args(sys.argv[1:])
+
+    if len(args) == 0 :
+        parser.print_usage()
+        sys.exit(1)
+
+    gerald_fns = args
+
+    # step through the files
+    for gerald_fn in gerald_fns :
+        path,fn,fnbase,fnext = get_file_parts(gerald_fn)
+        bed_lines = []
+
+
+        # where to write output to
+        if opts.stdout :
+            f_out = sys.stdout
+        else :
+            f_out = open(os.path.join(path,fnbase+'.bed'),'w')
+
+        # process input
+        gerald_d = DictReader(open(gerald_fn),fieldnames=GERALDOutput.FIELD_NAMES,delimiter='\t')
+        for line_d in gerald_d :
+            if (opts.pass_only and line_d['filtering'] == 'Y' and line_d['match_pos'] != '') or (not opts.pass_only and line_d['match_pos'] != '') :
+
+                if opts.chromo_strip is not None :
+                    line_d['match_chromo'] = line_d['match_chromo'].replace(opts.chromo_strip,'')
+
+                outline = [line_d['match_chromo'], # chromosome
+                           line_d['match_pos'], # start
+                           str(int(line_d['match_pos'])+len(line_d['read'])), # end
+                           line_d['read'], # read
+                           '0', # score
+                           '+' if line_d['match_strand'] == 'F' else '-', # strand
+                           '-', # thickStart
+                           '-', # thickEnd
+                           '0,0,255' if line_d['match_strand'] == 'F' else '255,0,0', # itemRgb 
+                          ]
+                outline = '\t'.join(outline)
+                f_out.write(outline+'\n')
+                #bed_lines.append(bed)
+
+        # this is the slow way
+        #for line in open(gerld_fn) :
+        #    grld = GERALDOutput(line)
+        #    if (opts.pass_only and grld.filtering == 'Y' and grld.match_pos != '') or (not opts.pass_only and grld.match_pos != '') :
+        #        bed = gerald_to_bed(grld,opts.min_fields)
+        #        f_out.write(bed.output_format())
+        #        #bed_lines.append(bed)
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/scripts/integrate_macs_ucsc.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,112 @@
+#!/usr/bin/env python
+
+import os
+import sys
+from optparse import OptionParser
+from pypeline import Pypeline, ProcessPypeStep as PPS, PythonPypeStep as PyPS
+
+from chipsequtil import get_org_settings
+
+usage = "%prog <org> <stage dir> <stage url> <MACS wiggle directory>"
+description = """Process a MACS wiggle directory when macs is invoked
+with --wig option, convert all gzipped chromosome wiggle files to
+bigWig format, copy to web staging directory <stage dir>, and create
+track lines for adding to UCSC genome browser.  Requires a <org> argument
+that has a path using *org_settings.py <org> ucsc_chrom_sizes* that
+points to a sizes file as created by UCSC's *fetchChromSizes <org>*
+tool."""
+
+parser = OptionParser(usage=usage,description=description)
+parser.add_option('--auto',dest='auto',action='store_true',help='run all steps non-interactively (for batch mode, e.g.)')
+
+if __name__ == '__main__' :
+
+    opts, args = parser.parse_args(sys.argv[1:])
+
+    if len(args) != 4 :
+        parser.error('Exactly four non-option arguments required')
+
+    organism, stage_dir, stage_url, macs_dir = args
+
+    pipeline = Pypeline('UCSC Integration',log='ucsc_integ.log')
+
+    steps = []
+
+    org_settings = get_org_settings(organism)
+
+    macs_path, macs_wiggle_path = os.path.dirname(macs_dir), os.path.basename(macs_dir)
+    macs_name = macs_wiggle_path.replace('_MACS_wiggle','')
+    wiggle_dir = macs_name+'_MACS_wiggle'
+    bigwig_fn = macs_name+'_%s_all_chr.bw'
+    d = {'wiggle_dir':macs_name+'_MACS_wiggle',
+         'chrom_sizes':org_settings['ucsc_chrom_sizes'],
+         'treat_bigwig_fn':macs_name+'_treat_all_chr.bw',
+         'control_bigwig_fn':macs_name+'_control_all_chr.bw',
+         'stage_dir':stage_dir,
+         'stage_url':stage_url,
+         'pwd':os.getcwd(),
+        }
+
+    # create bigWig files
+    zcat_treat_call = "zcat %(wiggle_dir)s/treat/*.gz | " + \
+                       "grep -v '^track' | " + \
+                       "sed 's/\.fa//g' | " + \
+                       "wigToBigWig -clip stdin %(chrom_sizes)s " + \
+                       "%(wiggle_dir)s/treat/%(treat_bigwig_fn)s"
+    zcat_control_call = "zcat %(wiggle_dir)s/control/*.gz | " + \
+                        "grep -v '^track' | " + \
+                        "sed 's/\.fa//g' | " + \
+                         "wigToBigWig -clip stdin %(chrom_sizes)s " + \
+                         "%(wiggle_dir)s/control/%(control_bigwig_fn)s"
+    steps.append(PPS('Convert wig to bigWig',[zcat_treat_call%d,zcat_control_call%d]))
+
+    # create the staging directory
+    mk_stage_dir_call = "mkdir -p %(stage_dir)s/%(wiggle_dir)s"%d
+    steps.append(PPS('Create staging directory',[mk_stage_dir_call]))
+
+    # stage bigWig files to staging directory (create links)
+    stage_treat_call = "ln -fs %(pwd)s/%(wiggle_dir)s/treat/%(treat_bigwig_fn)s " + \
+                       "%(stage_dir)s/%(wiggle_dir)s/%(treat_bigwig_fn)s"
+    stage_control_call = "ln -fs %(pwd)s/%(wiggle_dir)s/control/%(control_bigwig_fn)s " + \
+                       "%(stage_dir)s/%(wiggle_dir)s/%(control_bigwig_fn)s"
+    steps.append(PPS('Stage bigWig files',[stage_treat_call%d,stage_control_call%d]))
+
+    # generate track lines for treatment and control
+    treat_track_d = ['track',
+               'type=bigWig',
+               'name="Treatment"',
+               'description="%s Treatment"'%macs_name,
+               'bigDataUrl=%(stage_url)s/%(wiggle_dir)s/%(treat_bigwig_fn)s'%d]
+    treat_track = ' '.join(treat_track_d)
+
+    control_track_d = ['track',
+               'type=bigWig',
+               'name="Control"',
+               'description="%s Control"'%macs_name,
+               'bigDataUrl=%(stage_url)s/%(wiggle_dir)s/%(control_bigwig_fn)s'%d]
+    control_track = ' '.join(control_track_d)
+    track_str = '\n'.join([treat_track,
+                          control_track])
+
+    track_fn = wiggle_dir+'_tracks.txt'
+    def track_call(track_fn, track_str) :
+        f = open(track_fn,'w')
+        f.write(track_str+'\n')
+        f.close()
+    steps.append(PyPS('Generate track lines file',track_call,
+                      callable_args=(track_fn,track_str))
+                )
+
+    #calls = [zcat_treat_call,
+    #         zcat_control_call,
+    #         mk_stage_dir_call,
+    #         stage_treat_call,
+    #         stage_control_call,
+    #         track_call
+    #         ]
+
+    #print calls
+    #steps.append(PPS('Stage Wiggle',calls))
+
+    pipeline.add_steps(steps)
+    pipeline.run(interactive=not opts.auto)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/scripts/join_mapped_known_genes.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,154 @@
+#!/usr/bin/env python
+
+import sys
+import warnings
+
+from csv import reader, writer
+from collections import defaultdict as dd
+from optparse import OptionParser
+
+usage = '%prog -b <mapped DNA binding file>|-a <mapped microarray file> [-b <mapped DNA binding file> ...] [-a <mapped microarray file> ...]'
+description = """Join all files on the first column, concatenating records with \
+matching entries onto one line per entry.  Understands DNA binding data as mapped \
+with *map_peaks_to_known_genes.py* utility microarray data as mapped by \
+*probeset_to_known_genes.py* utility, passed to program using *-b* and *-a* options \
+respectively.  If a file contains more than one mapping to a gene additional columns \
+are added. At least one file of either type is required.  Field names are written as \
+<filename>.<original field name>.<map number>
+"""
+epilog="Note: microarray files should have been created by bioconductor, and all files should have a row of fieldnames as the first line"
+parser = OptionParser(usage=usage,description=description,epilog=epilog)
+parser.add_option('-a','--affy-file',dest='affy_file',action='append',default=[],help='add a mapped microarray file')
+parser.add_option('-b','--bind-file',dest='bind_file',action='append',default=[],help='add a mapped DNA binding file (e.g. MACS, BED)')
+#parser.add_option('-b','--bed-file',dest='bed_file',action='append',default=[],help='add a mapped BED formatted peaks file')
+parser.add_option('-m','--macs-file',dest='macs_file',action='append',default=[],help='DEPRECATED: use -b instead, add a mapped default MACS formatted peaks (*.xls) file')
+parser.add_option('--output',dest='output',default=None,help='file to output joined records to [default: stdout]')
+#parser.add_option('--intersect',dest='intersect',action='store_true',help='only output records common to all file passed in')
+parser.add_option('--first-only',dest='first_only',action='store_true',help='only output the first mapping to a gene from each file')
+parser.add_option('--binary',dest='binary',action='store_true',help='output only one column per file with a 0 or 1 to indicate whether a mapping exists in that file')
+parser.add_option('--binary-plus',dest='binary_plus',action='store_true',help='output one column per file with a 0 or 1 to indicate whether a mapping exists in that file in addition to all other columns')
+parser.add_option('--field-types',dest='field_types',action='store_true',help='prepend BIND or AFFY to the beginning of all appropriate columns')
+#parser.add_option('--symbols',dest='symbols',action='store_true',help='mapped files contain symbols in second column (per map_peaks_to_known_genes.py|probeset_to_known_gene.py --symbol-xref option)')
+
+if __name__ == '__main__' :
+
+    opts,args = parser.parse_args(sys.argv[1:])
+
+    if len(args) > 0 :
+        parser.error('There were non-option command line arguments passed, all files should have a preceeding option indicating filetype')
+
+    if len(opts.macs_file) != 0 :
+        warnings.warn('The -m option is deprecated, please replace these flags with -b instead.  Adding MACS filenames to binding filename list.',DeprecationWarning)
+        opts.bind_file.extend(opts.macs_file)
+
+    if len(opts.bind_file) == 0 and len(opts.affy_file) == 0 :
+        parser.error('No files were passed in, aborting')
+
+    # union of all genes
+    all_genes = set()
+
+    # TODO - fix intersect w/ binary
+    opts.intersect = False
+
+    # TODO - actually make this an option, or the default
+    opts.symbols = True
+    if opts.symbols :
+        symbol_map = {}
+
+    # read all the files in
+    def get_file_dict(fns,header_prefix='') :
+        file_map = dd(lambda: dd(list))
+        out_fieldnames = []
+        blank_entry = []
+        for fn in fns :
+            max_maps = 0
+            f = reader(open(fn),delimiter='\t')
+            #f = open(fn)
+            fieldnames = f.next()
+            fieldnames = fieldnames[2:] # we don't want existing knownGeneID or geneSymbol
+            # read in the data, create a dictionary
+            for l in f :
+                if opts.symbols :
+                    gene, symbol, data = l[0],l[1],l[2:]
+                    symbol_map[gene] = symbol
+                else :
+                    gene, data = l.split('\t',1)
+                file_map[fn][gene].append(data)
+                max_maps = max(max_maps,len(file_map[fn][gene]))
+                all_genes.add(gene)
+
+            # if we're adding a binary column, do it
+            if opts.binary_plus :
+                out_fieldnames.append(header_prefix+fn+'.MAPPED')
+
+            # construct the fieldnames for this file
+            for i in range(max_maps) :
+                out_fieldnames.extend(['%s%s.%d.%s'%(header_prefix,fn,i,h) for h in fieldnames])
+
+            # pad out data entries w/ fewer than max_maps
+            for gene,data in file_map[fn].items() :
+                while len(data) < max_maps :
+                    data.append(['']*len(fieldnames))
+            file_map[fn]['blank'] = [['']*len(fieldnames) for _ in range(max_maps)]
+        return file_map,out_fieldnames
+
+    #macs_file_map, macs_fieldnames = get_file_dict(opts.macs_file)
+    #bed_file_map, bed_fieldnames = get_file_dict(opts.bed_file)
+    bind_prefix = 'BIND:' if opts.field_types else ''
+    affy_prefix = 'AFFY:' if opts.field_types else ''
+    bind_file_map, bind_fieldnames = get_file_dict(opts.bind_file,bind_prefix)
+    affy_file_map, affy_fieldnames = get_file_dict(opts.affy_file,affy_prefix)
+
+    # prepare output objects
+    out_f = open(opts.output,'w') if opts.output else sys.stdout
+    map_fieldnames = ['knownGeneID']
+    if opts.symbols :
+        map_fieldnames.append('geneSymbol')
+    #all_fieldnames = map_fieldnames+macs_fieldnames+bed_fieldnames+affy_fieldnames
+    all_fieldnames = map_fieldnames+bind_fieldnames+affy_fieldnames
+    if opts.binary :
+        #all_fieldnames = map_fieldnames+opts.macs_file+opts.bed_file+opts.affy_file
+        all_fieldnames = [x+'.MAPPED' for x in map_fieldnames+opts.bind_file+opts.affy_file]
+    join_writer = writer(out_f,delimiter='\t')
+    join_writer.writerow(all_fieldnames)
+
+    # go through all the genes and print out lines
+    for gene in all_genes :
+        gene_line = [gene]
+        if opts.symbols :
+            gene_line.append(symbol_map[gene])
+        #for filetype_data,fns in zip([macs_file_map,bed_file_map,affy_file_map],[opts.macs_file,opts.bed_file,opts.affy_file]) :
+        for filetype_data,fns in zip([bind_file_map,affy_file_map],[opts.bind_file,opts.affy_file]) :
+            for fn,recs in [(fn,filetype_data[fn]) for fn in fns] :
+            #for fn,recs in d.items() :
+                if recs.has_key(gene) :
+                    # only output the first entry
+                    if opts.first_only :
+                        gene_line.extend(recs[gene][0])
+                    # only output a 1 or a zero
+                    elif opts.binary :
+                        gene_line.extend('1')
+                    # else output normally
+                    else :
+                        # add binary column in addition to other output
+                        if opts.binary_plus :
+                            gene_line.extend('1')
+                        for rec in recs[gene] :
+                            gene_line.extend(rec)
+                else :
+                    # if intersecting, ignore this gene
+                    if opts.intersect :
+                        continue
+                    elif opts.binary :
+                        gene_line.extend('0')
+                    else :
+                        # add binary column in addition to other output
+                        if opts.binary_plus :
+                            gene_line.extend('0')
+                        for blank in filetype_data[fn]['blank'] :
+                            #print len(blank)
+                            gene_line.extend(blank)
+                #print fn, gene_line[2], len(gene_line), gene_line
+        join_writer.writerow(gene_line)
+
+    if opts.output : out_f.close()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/scripts/kg_to_gff.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,78 @@
+#!/usr/bin/env python
+
+import os
+import sys
+from csv import DictReader, DictWriter, QUOTE_NONE
+from optparse import OptionParser
+
+from chipsequtil import KnownGeneFile, get_file_parts
+
+#args = ['/nfs/genomes/mouse_gp_jul_07/anno/knownGene-2010-07-08.txt','/nfs/genomes/mouse_gp_jul_07/anno/kgXref-2010-07-08.txt']
+args = ['/nfs/genomes/mouse_gp_jul_07/anno/knownGene-2010-08-03.gtf','/nfs/genomes/mouse_gp_jul_07/anno/kgXref-2010-07-08.txt']
+usage = '%prog <knownGene annotation>'
+description = 'convert a UCSC knownGene annotation to GFF'
+parser = OptionParser(usage=usage,description=description)
+
+
+if __name__ == '__main__' :
+
+    opts, args = parser.parse_args(args)
+
+    kg_path,kg_fn,kg_base,kg_ext = get_file_parts(args[0])
+    #kg_f = KnownGeneFile(args[0])
+
+    # xref for finding gene symbols
+    kgXref_fn = args[1]
+    kgXref_fieldnames = ['kgID','mRNA','spID','spDisplayID','geneSymbol','refseq','proAcc','description']
+    xref_map = dict([(x['kgID'],x) for x in DictReader(open(kgXref_fn),delimiter='\t',fieldnames=kgXref_fieldnames)])
+
+    gff_headers = ['seqname','source','feature','start','end','score','strand','frame','attributes']
+    gff_reader = DictReader(open(args[0]),delimiter='\t',fieldnames=gff_headers)
+    gff_writer = DictWriter(sys.stdout,delimiter='\t',fieldnames=gff_headers,quotechar='',quoting=QUOTE_NONE,lineterminator='\n')
+    #gff_writer.writerow(dict([(x,x) for x in gff_headers]))
+
+    for i,rec in enumerate(gff_reader) :
+        #d = {}
+        #d['seqname'] = rec['chrom']
+        #d['source'] = 'UCSC_knownGene'
+        #d['feature'] = 'gene'
+        #d['start'] = rec['txStart']
+        #d['end'] = rec['txEnd']
+        #d['score'] = '.'
+        #d['strand'] = rec['strand']
+        #d['frame'] = '.'
+        #gene_name = rec['name']
+
+        gff_attrs_lst = [x.strip() for x in rec['attributes'].split(';')][:-1]
+        gff_attrs = {}
+        for attr in gff_attrs_lst :
+            k,v = attr.split(' ',1)
+            gff_attrs[k] = eval(v)
+
+        kg_name = gff_attrs['gene_id']
+
+        # try to find a gene symbol
+        gene_id = xref_map[kg_name].get('geneSymbol',None)
+        #gene_id = kg_name
+        #if gene_id is None :
+        #    gene_id = xref_map[kg_name].get('mRNA',None)
+        #if gene_id is None :
+        #    gene_id = xref_map[kg_name].get('refseq',None)
+        if gene_id is None : # I give up
+            gene_id = kg_name
+
+        gff_attrs_lst += ['gene_name "%s"'%gene_id]
+        rec['attributes'] = '; '.join(gff_attrs_lst)
+        gff_writer.writerow(rec)
+
+        # now write the exons
+        #d['feature'] = 'exon'
+        #for j,(st,en) in enumerate(zip(rec['exonStarts'],rec['exonEnds'])) :
+        #    d['start'] = st
+        #    d['end'] = en
+        #    d['attributes'] = '; '.join(['gene_id "%s"'%gene_id,'transcript_id "%s"'%rec['name'],'exon_number "%d"'%(j+1),'ID "%s.exon_%d"'%(rec['name'],j),'PARENT "%s"'%rec['name']])
+        #    gff_writer.writerow(d)
+
+
+    # version with knownGene in gene_name
+    # version with symbol in gene_name
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/scripts/map_intervals.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,126 @@
+#!/usr/bin/env python
+
+import sys
+
+from collections import defaultdict
+from csv import reader
+from optparse import OptionParser
+
+from bx.intervals.intersection import IntervalTree, Interval
+
+usage = '%prog [options] <from> <to>'
+description = """Find records in <to> interval file that map to records in
+<from> interval file.  Files should be tab delimited and are expected to have
+a chromosome column, a start column, and an end column.  The indices of these
+columns can be specified on the command line but by default are the first
+three columns, respectively.  Prints out to stdout by default one new line
+separated row per row in <from> with a line from <to> where there is a mapping.
+If no mapping is found (e.g. when specifying a maximum margin to search within)
+the word None is printed.  By default only prints nearest record, with ties
+settled by smallest line number in <to>."""
+parser = OptionParser(usage=usage,description=description)
+parser.add_option('-w','--window',dest='window',type="float",nargs=2,
+                  default=(1e9,1e9),
+                  help="window as <int upstream> <int downstream> to search for intervals [default: %default]")
+parser.add_option('-f','--from',dest='from_ind',type="int",nargs=3,
+                  default=(0,1,2),
+                  help="coordinates of chromosome, start, stop in <from> file")
+parser.add_option('-i','--skip-from-header',dest='skip_fh',action='store_true',
+                  help="<from> has a header that should be skipped")
+parser.add_option('-t','--to',dest='to_ind',type="int",nargs=3,
+                  default=(0,1,2),
+                  help="coordinates of chromosome, start, stop in <to> file")
+parser.add_option('-j','--skip-to-header',dest='skip_th',action='store_true',
+                  help="<to> has a header that should be skipped")
+
+if __name__ == '__main__' :
+
+    opts, args = parser.parse_args(sys.argv[1:])
+
+    if len(args) != 2 :
+        parser.error('Exactly 2 non-option arguments are required')
+
+    from_fn, to_fn = args
+
+    chr_trees = defaultdict(IntervalTree)
+    chr_sizes = defaultdict(lambda : dict(minstart=sys.maxint,maxend=0))
+
+    if any([x > 1e9 for x in opts.window]) :
+        parser.error('Window maximum is +/- 1e9')
+
+    to_reader = reader(open(to_fn),delimiter='\t')
+    if opts.skip_th :
+        to_header = to_reader.next()
+
+    to_chr, to_st, to_en = opts.to_ind
+    for r in to_reader :
+        i = Interval(int(r[to_st]),
+                     int(r[to_en]),
+                     value=r,
+                     chrom=r[to_chr]
+                     )
+        chr_trees[r[to_chr]].insert_interval(i)
+        chr_sizes[r[to_chr]]['minstart'] = min(int(r[to_st]),chr_sizes[r[to_chr]]['minstart'])
+        chr_sizes[r[to_chr]]['maxend'] = max(int(r[to_st]),chr_sizes[r[to_chr]]['maxend'])
+
+    # window default is 1e9 because no chromosome is more than
+    # ten billion base pairs, right?!
+    def find_nearest(t,s,e,window=(1e9,1e9)) :
+
+        # look for record within intervals
+        inside = t.find(s,e)
+        
+        if len(inside) >= 1 : # pick the first one, list returned is sorted
+            return inside[0]
+
+        i = Interval(s,e)
+        before = t.upstream_of_interval(i,max_dist=window[0])
+        after = t.downstream_of_interval(i,max_dist=window[1])
+
+        before = before[0] if len(before) != 0 else None
+        after = after[0] if len(after) != 0 else None
+
+        if before and after :
+            b_dist = min(abs(before.end-s),abs(e-before.start))
+            a_dist = min(abs(after.end-s),abs(e-after.start))
+            nearest = before if b_dist < a_dist else after
+        elif before :
+            nearest = before
+        elif after :
+            nearest = after
+        else :
+            nearest = None
+        return nearest
+
+    # now go through the from file
+    from_reader = reader(open(from_fn),delimiter='\t')
+    if opts.skip_fh : from_reader.next()
+
+    from_chr, from_st, from_en = opts.from_ind
+    if opts.skip_th :
+        print '\t'.join(to_header)
+    for r in from_reader :
+        t = find_nearest(chr_trees[r[from_chr]],int(r[from_st]),int(r[from_en]),
+                         window=opts.window)
+        if t :
+            print '\t'.join(t.value)
+        else :
+            print t
+    """
+    # tests
+    print 'interval is before any other interval in tree'
+    t = find_nearest(chr_trees['chr2'],10388500,10388510)
+    print '\tCorrect answer: %s, Returned answer: %s'%('mmu-mir-466f-1',t.value),t
+    print 'interval is after any other interval in tree'
+    t = find_nearest(chr_trees['chr1'],200000000,200000010)
+    print '\tCorrect answer: %s, Returned answer: %s'%('mmu-mir-29c',t.value),t
+    print 'interval is between intervals'
+    t = find_nearest(chr_trees['chr3'],89773941,89774021)
+    print '\tCorrect answer: %s, Returned answer: %s'%('mmu-mir-190b',t.value),t
+    print 'interval is inside another interval'
+    t = find_nearest(chr_trees['chr3'],89873999,89874001)
+    print '\tCorrect answer: %s, Returned answer: %s'%('mmu-mir-190b',t.value), t
+    print 'interval is too far from anything to return anything'
+    t = find_nearest(chr_trees['chr3'],89773941,89774021,window=10)
+    print '\tCorrect answer: None, Returned answer: %s'%t
+    """
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/scripts/map_peaks_to_genes.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,202 @@
+#!/usr/bin/env python
+
+import sys, os
+from optparse import OptionParser
+from collections import defaultdict as dd
+from chipsequtil import MACSOutput, BEDOutput, RefGeneOutput, parse_number
+from csv import DictReader, DictWriter
+
+usage = '%prog [options] <refGene file> <peaks file>'
+description = """
+Map the peaks in <peaks file> to genes in <refGene file>.  <refGene file> is
+format is as specified in http://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/refGene.sql.
+<peaks file> format is as produced by MACS."""
+epilog = ''
+parser = OptionParser(usage=usage,description=description,epilog=epilog)
+parser.add_option('--upstream-window',dest='upst_win',type='int',default=5500,help='window width in base pairs to consider promoter region [default: %default]')
+parser.add_option('--downstream-window',dest='dnst_win',type='int',default=2500,help='window width in base pairs to consider downstream region [default: %default]')
+parser.add_option('--map-output',dest='peak_output',default=sys.stdout,help='filename to output mapped peaks in BED format to [default: stdout]')
+parser.add_option('--stats-output',dest='stats_output',default=sys.stderr,help='filename to output summary stats in conversion [default: stderr]')
+parser.add_option('--peaks-format',dest='peaks_fmt',default='MACS',type='choice',choices=['MACS','BED'],help='format of peaks input file [default: %default]')
+
+# TODO - options
+#parser.add_option('--use-cds',dest='use_cds',action='store_true',help='use cdsStart and cdsEnd fields instead of txStart and txEnd to do mapping')
+#parser.add_option('--capture-intergenic'...)
+#parser.add_option('--map-format',dest='peak_format',type='choice',choices=['default','BED'],help='format of peak output [default: %default]')
+#parser.add_option('--stats-format',dest='stats_format',type='choice',choices=['human','python'],help='format of summary stats output [default: %default]')
+
+def parse_gene_ref(ref_gene) :
+    #FIXME - maybe, if galaxy doesn't work out, figure out how to deal with multiple RefGene mapping formats?
+    fieldnames = ['geneName','name','chrom','strand','txStart','txEnd','cdsStart','cdsEnd','exonCount','exonStarts','exonEnds']
+    reader = DictReader(ref_gene,fieldnames=fieldnames,delimiter='\t')
+    gene_ref = dd(list)
+    for ref_dict in reader :
+        for k,v in ref_dict.items() :
+            # coerce numbers where possible
+            ref_dict[k] = parse_number(v)
+
+        # turn 'x,x,x,...' into a list
+        ref_dict['exonStarts'] = [parse_number(x) for x in ref_dict['exonStarts'].split(',')]
+        if ref_dict['exonStarts'][-1] == '' : ref_dict['exonStarts'].remove('')
+        ref_dict['exonEnds'] = [parse_number(x) for x in ref_dict['exonEnds'].split(',')]
+        if ref_dict['exonEnds'][-1] == '' : ref_dict['exonEnds'].remove('')
+
+        gene_ref[ref_dict['chrom']].append(ref_dict)
+
+    return gene_ref
+
+
+if __name__ == '__main__' :
+
+    opts, args = parser.parse_args(sys.argv[1:])
+
+    if len(args) < 2 :
+        parser.error('Must provide two filename arguments')
+
+    gene_ref = parse_gene_ref(open(args[0]))
+    if opts.peaks_fmt == 'MACS' :
+        fieldnames = MACSOutput.FIELD_NAMES
+        chr_field, start_field, end_field = 'chr', 'start', 'end'
+    elif opts.peaks_fmt == 'BED' :
+        fieldnames = BEDOutput.FIELD_NAMES
+        chr_field, start_field, end_field = 'chrom', 'chromStart', 'chromEnd'
+    else :
+        fieldnames = []
+
+    peaks_reader = DictReader(open(args[1]),fieldnames=fieldnames,delimiter='\t')
+
+    # default output format:
+    # <chromo> <peak loc> <accession #> <gene symbol> <strand> <map type> <map subtype> <score> <dist from feature>
+    # score = (peak-TSS)/(TSE-TSS) - peak distance from TSS as fraction of length of gene
+    output_fields = ['chromo',
+                     'peak loc',
+                     'accession #',
+                     'gene symbol',
+                     'strand',
+                     'map type',
+                     'map subtype',
+                     'score',
+                     'dist from feature',
+    ]
+    if opts.peak_output != sys.stdout :
+        opts.peak_output = open(opts.peak_output,'w')
+    peaks_writer = DictWriter(opts.peak_output,output_fields,delimiter='\t',lineterminator='\n')
+    unique_genes = set()
+    map_stats = dd(int)
+    for peak in peaks_reader :
+
+        # if this is a comment or header line get skip it
+        if peak[fieldnames[0]].startswith('#') or \
+           peak[fieldnames[0]] == fieldnames[0] or \
+           peak[fieldnames[0]].startswith('track') : continue
+
+        # coerce values to numeric if possible
+        for k,v in peak.items() : peak[k] = parse_number(v)
+
+        # peak assumed to be in the middle of the reported peak range
+        peak_loc = (peak[start_field]+peak[end_field])/2
+
+        chrom_genes = gene_ref[peak[chr_field]]
+
+        if len(chrom_genes) == 0 :
+            sys.stderr.write('WARNING: peak chromosome %s not found in gene reference, skipping: %s\n'%(peak[chr_field],peak))
+            continue
+
+        mapped = False
+
+        # walk through the genes for this chromosome
+        for gene in chrom_genes :
+
+            # reusable dictionary for output
+            out_d = {}.fromkeys(output_fields,0)
+            out_d['map type'] = ''
+            out_d['chromo'] = peak[chr_field]
+            out_d['peak loc'] = peak_loc
+
+            # determine intervals for promoter, gene, and downstream
+            if gene['strand'] == '+' :
+                promoter_coords = max(gene['txStart']-1-opts.upst_win,0), gene['txStart']-1
+                gene_coords = gene['txStart'], gene['txEnd']
+                downstream_coords = gene['txEnd']+1, gene['txEnd']+1+opts.dnst_win
+            else :
+                promoter_coords = gene['txEnd']+1, gene['txEnd']+1+opts.upst_win # +1 because we're using 1 based indexing
+                gene_coords = gene['txStart'], gene['txEnd']
+                downstream_coords = gene['txStart']-1-opts.dnst_win, gene['txStart']-1 # -1 because we're using 1 based indexing
+
+            # check for promoter
+            if peak_loc >= promoter_coords[0] and peak_loc <= promoter_coords[1] :
+                out_d['map type'] = 'promoter'
+                out_d['dist from feature'] = peak_loc - promoter_coords[1] if gene['strand'] == '+' else promoter_coords[0] - peak_loc
+
+            # check for gene
+            elif peak_loc >= gene_coords[0] and peak_loc <= gene_coords[1] :
+                # check for intron/exon
+                exon_coords = zip(gene['exonStarts'],gene['exonEnds'])
+                in_exon = False
+                for st,en in exon_coords :
+                    if peak_loc >= st and peak_loc <= en :
+                        in_exon = True
+                        break
+                out_d['map type'] = 'gene'
+                out_d['map subtype'] = 'exon' if in_exon else 'intron'
+
+                # score = (peak-TSS)/(TSE-TSS) - peak distance from TSS as fraction of length of gene
+                gene_len = float(gene_coords[1]-gene_coords[0])
+                out_d['score'] = (peak_loc-gene_coords[0])/gene_len if gene['strand'] == '+' else (gene_coords[1]-peak_loc)/gene_len
+
+                # distance calculated from start of gene
+                out_d['dist from feature'] = peak_loc - promoter_coords[1] if gene['strand'] == '+' else promoter_coords[0] - peak_loc
+
+                map_stats[out_d['map subtype']] += 1
+
+            # check for downstream
+            elif peak_loc >= downstream_coords[0] and peak_loc <= downstream_coords[1] :
+                out_d['map type'] = 'after'
+                out_d['dist from feature'] = peak_loc - downstream_coords[0] if gene['strand'] == '+' else downstream_coords[1] - peak_loc
+
+            # does not map to this gene
+            else :
+                pass
+
+            # map type is not blank if we mapped to something
+            if out_d['map type'] != '' :
+
+                out_d['accession #'] = gene['name']
+                out_d['gene symbol'] = gene['geneName']
+                out_d['strand'] = gene['strand']
+
+                map_stats[out_d['map type']] += 1
+                peaks_writer.writerow(out_d)
+
+                unique_genes.add(gene['name'])
+                mapped = True
+
+                """
+                print 'Peak:',peak
+                print 'Gene:',gene
+                print 'Peak loc:',peak_loc
+                print promoter_coords
+                print gene_coords
+                print downstream_coords
+                raw_input('Wait for it...')
+                """
+
+                # reset map_type
+                out_d['map type'] = ''
+
+        if not mapped :
+            #out_d['map type'] = 'intergenic'
+            #peaks_writer.writerow(out_d)
+            map_stats['intergenic'] += 1
+
+    if opts.peak_output != sys.stdout :
+        opts.peak_output.close()
+
+    if opts.stats_output != sys.stderr :
+        opts.stats_output = open(opts.stats_output,'w')
+
+    for k,v in map_stats.items() :
+        opts.stats_output.write('%s: %s\n'%(k,v))
+
+    if opts.stats_output != sys.stderr :
+        opts.stats_output.close()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/scripts/map_peaks_to_known_genes.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,233 @@
+#!/usr/bin/env python
+
+import sys, os
+from optparse import OptionParser
+from collections import defaultdict as dd
+from csv import DictReader, DictWriter
+
+from chipsequtil import MACSFile, BEDFile, KnownGeneFile, parse_number
+from chipsequtil.util import MultiLineHelpFormatter
+
+usage = '%prog [options] <knownGene file> <knownGene xRef file> <peaks file>'
+description = """
+Map the peaks in <peaks file> to genes in <knownGene file>.  <knownGene file> is\
+format is as specified in http://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/knownGene.sql.\
+<peaks file> format is as produced by MACS.  If *auto* is chosen (default) file extension \
+is examined for *.xls* for default MACS format or *.bed* for BED format.  If the --detail\
+option is provided, the following extra fields are appended to each row:
+
+peak loc, dist from feature, score, map type, map subtype
+"""
+epilog = ''
+parser = OptionParser(usage=usage,description=description,epilog=epilog,formatter=MultiLineHelpFormatter())
+parser.add_option('--upstream-window',dest='upst_win',type='int',default=5500,help='window width in base pairs to consider promoter region [default: %default]')
+parser.add_option('--downstream-window',dest='dnst_win',type='int',default=2500,help='window width in base pairs to consider downstream region [default: %default]')
+parser.add_option('--tss',dest='tss',action='store_true',help='calculate downstream window from transcription start site instead of transcription end site')
+parser.add_option('--map-output',dest='peak_output',default=None,help='filename to output mapped peaks to [default: stdout]')
+parser.add_option('--stats-output',dest='stats_output',default=sys.stderr,help='filename to output summary stats in conversion [default: stderr]')
+parser.add_option('--peaks-format',dest='peaks_fmt',default='auto',type='choice',choices=['auto','MACS','BED'],help='format of peaks input file [default: %default]')
+parser.add_option('--detail',dest='detail',action='store_true',help='add extra fields to output, see description')
+parser.add_option('--intergenic',dest='intergenic',action='store_true',help='write intergenic peaks to the gene file as well with None as gene ID')
+#parser.add_option('--symbol-xref',dest='symbol_xref',default=None,help='use the kgXref table file supplied to find a gene symbol, output as second column')
+
+# TODO - options
+#parser.add_option('--use-cds',dest='use_cds',action='store_true',help='use cdsStart and cdsEnd fields instead of txStart and txEnd to do mapping')
+#parser.add_option('--capture-intergenic'...)
+#parser.add_option('--map-format',dest='peak_format',type='choice',choices=['default','BED'],help='format of peak output [default: %default]')
+#parser.add_option('--stats-format',dest='stats_format',type='choice',choices=['human','python'],help='format of summary stats output [default: %default]')
+
+def parse_gene_ref(ref_gene) :
+    reader = KnownGeneFile(ref_gene)
+    gene_ref = dd(list)
+    for ref_dict in reader :
+        gene_ref[ref_dict['chrom']].append(ref_dict)
+
+    return gene_ref
+
+def parse_gene_ref_line(l) :
+    l = map(parse_number, l) # coerce to numbers where possible
+    l[9] = map(parse_number, l[9].split(',')) # turn 'x,x,x,...' into list
+    l[10] = map(parse_number, l[10].split(','))
+    return l
+
+if __name__ == '__main__' :
+
+    opts, args = parser.parse_args(sys.argv[1:])
+
+    if len(args) < 3 :
+        parser.error('Must provide three filename arguments')
+
+    gene_ref = parse_gene_ref(args[0])
+    xref_fn = args[1]
+    peaks_fn = args[2]
+    if opts.peaks_fmt == 'auto' :
+        path,ext = os.path.splitext(peaks_fn)
+        if ext.lower() == '.xls' :
+            opts.peaks_fmt = 'MACS'
+        elif ext.lower() == '.bed' :
+            opts.peaks_fmt = 'BED'
+        else :
+            parser.error('Could not guess peaks file format by extension (%s), aborting'%ext)
+
+    if opts.peaks_fmt == 'MACS' :
+        peaks_reader_cls = MACSFile
+        chr_field, start_field, end_field = 'chr', 'start', 'end'
+    elif opts.peaks_fmt == 'BED' :
+        peaks_reader_cls = BEDFile
+        chr_field, start_field, end_field = 'chrom', 'chromStart', 'chromEnd'
+    else :
+        # should never happen
+        fieldnames = []
+
+    #peaks_reader = DictReader(open(args[1]),fieldnames=fieldnames,delimiter='\t')
+    peaks_reader = peaks_reader_cls(peaks_fn)
+
+    # default output format:
+    if opts.peak_output :
+        peak_output = open(opts.peak_output,'w')
+    else :
+        peak_output = sys.stdout
+
+    fieldnames = peaks_reader.FIELD_NAMES
+    if opts.detail :
+        fieldnames += ["peak loc","dist from feature","score","map type","map subtype"]
+    output_fields = ['knownGeneID']+fieldnames
+
+    # see if the user wants gene symbols too
+    # TODO - actually make this an option, or make it required
+    opts.symbol_xref = xref_fn
+    if opts.symbol_xref :
+        kgXref_fieldnames = ['kgID','mRNA','spID','spDisplayID','geneSymbol','refseq','protAcc','description']
+        symbol_xref_reader = DictReader(open(opts.symbol_xref),fieldnames=kgXref_fieldnames,delimiter='\t')
+        symbol_xref_map = {}
+        for rec in symbol_xref_reader :
+            symbol_xref_map[rec['kgID']] = rec
+        output_fields = ['knownGeneID','geneSymbol']+fieldnames
+
+    peaks_writer = DictWriter(peak_output,output_fields,delimiter='\t',extrasaction='ignore',lineterminator='\n')
+    peaks_writer.writerow(dict([(k,k) for k in output_fields]))
+    unique_genes = set()
+    map_stats = dd(int)
+    for peak in peaks_reader :
+
+        # if this is a comment or header line get skip it
+        if peak[fieldnames[0]].startswith('#') or \
+           peak[fieldnames[0]] == fieldnames[0] or \
+           peak[fieldnames[0]].startswith('track') : continue
+
+        # coerce values to numeric if possible
+        for k,v in peak.items() : peak[k] = parse_number(v)
+
+        # MACS output gives us summit
+        if opts.peaks_fmt == 'MACS' :
+            peak_loc = peak[start_field]+peak['summit']
+        else : # peak assumed to be in the middle of the reported peak range
+            peak_loc = (peak[start_field]+peak[end_field])/2
+
+        chrom_genes = gene_ref[peak[chr_field]]
+
+        if len(chrom_genes) == 0 :
+            sys.stderr.write('WARNING: peak chromosome %s not found in gene reference, skipping: %s\n'%(peak[chr_field],peak))
+            continue
+
+        mapped = False
+
+        # walk through the genes for this chromosome
+        for gene in chrom_genes :
+
+            # reusable dictionary for output
+            out_d = {}.fromkeys(output_fields,0)
+            out_d.update(peak)
+            out_d['map type'] = ''
+            out_d['chromo'] = peak[chr_field]
+            out_d['peak loc'] = peak_loc
+
+            # determine intervals for promoter, gene, and downstream
+            if gene['strand'] == '+' :
+                promoter_coords = max(gene['txStart']-1-opts.upst_win,0), gene['txStart']-1
+                if opts.tss :
+                    gene_coords = gene['txStart'], min(gene['txEnd'],gene['txStart']+opts.dnst_win)
+                    downstream_coords = gene['txEnd']+1,gene['txStart']+opts.dnst_win
+                else :
+                    gene_coords = gene['txStart'], gene['txEnd']
+                    downstream_coords = gene['txEnd']+1, gene['txEnd']+1+opts.dnst_win
+            else :
+                promoter_coords = gene['txEnd']+1, gene['txEnd']+1+opts.upst_win # +1 because we're using 1 based indexing
+                if opts.tss :
+                    gene_coords = max(gene['txStart'],gene['txEnd']-opts.upst_win), gene['txEnd']
+                    downstream_coords = gene['txEnd']-1-opts.dnst_win, gene['txStart']-1 # -1 because we're using 1 based indexing
+                else :
+                    gene_coords = gene['txStart'], gene['txEnd']
+                    downstream_coords = gene['txStart']-1-opts.dnst_win, gene['txStart']-1 # -1 because we're using 1 based indexing
+
+            # check for promoter
+            if peak_loc >= promoter_coords[0] and peak_loc <= promoter_coords[1] :
+                out_d['map type'] = 'promoter'
+                out_d['dist from feature'] = peak_loc - promoter_coords[1] if gene['strand'] == '+' else promoter_coords[0] - peak_loc
+
+            # check for gene
+            elif peak_loc >= gene_coords[0] and peak_loc <= gene_coords[1] :
+                # check for intron/exon
+                exon_coords = zip(gene['exonStarts'],gene['exonEnds'])
+                in_exon = False
+                for st,en in exon_coords :
+                    if peak_loc >= st and peak_loc <= en :
+                        in_exon = True
+                        break
+                out_d['map type'] = 'gene'
+                out_d['map subtype'] = 'exon' if in_exon else 'intron'
+
+                # score = (peak-TSS)/(TSE-TSS) - peak distance from TSS as fraction of length of gene
+                gene_len = float(gene_coords[1]-gene_coords[0])
+                out_d['score'] = (peak_loc-gene_coords[0])/gene_len if gene['strand'] == '+' else (gene_coords[1]-peak_loc)/gene_len
+
+                # distance calculated from start of gene
+                out_d['dist from feature'] = peak_loc - promoter_coords[1] if gene['strand'] == '+' else promoter_coords[0] - peak_loc
+
+                map_stats[out_d['map subtype']] += 1
+
+            # check for downstream
+            elif peak_loc >= downstream_coords[0] and peak_loc <= downstream_coords[1] :
+                out_d['map type'] = 'after'
+                if opts.tss :
+                    out_d['dist from feature'] = peak_loc - gene_coords[0] if gene['strand'] == '+' else gene_coords[1] - peak_loc
+                else :
+                    out_d['dist from feature'] = peak_loc - downstream_coords[0] if gene['strand'] == '+' else downstream_coords[1] - peak_loc
+
+            # does not map to this gene
+            else :
+                pass
+
+            # map type is not blank if we mapped to something
+            if out_d['map type'] != '' :
+
+                #out_d = {'knownGeneID':gene['name']}
+                out_d['knownGeneID'] = gene['name']
+                if opts.symbol_xref :
+                    out_d['geneSymbol'] = symbol_xref_map[gene['name']]['geneSymbol']
+                peaks_writer.writerow(out_d)
+
+                mapped = True
+
+                # reset map_type
+                out_d['map type'] = ''
+
+        if not mapped :
+            if opts.intergenic :
+                out_d['knownGeneID'] = 'None'
+                out_d['geneSymbol'] = 'None'
+                out_d['map type'] = 'intergenic'
+                peaks_writer.writerow(out_d)
+            map_stats['intergenic'] += 1
+
+    if peak_output != sys.stdout :
+        peak_output.close()
+
+    #if opts.stats_output != sys.stderr :
+    #    opts.stats_output = open(opts.stats_output,'w')
+
+    #for k,v in map_stats.items() :
+    #    opts.stats_output.write('%s: %s\n'%(k,v))
+
+    #if opts.stats_output != sys.stderr :
+    #    opts.stats_output.close()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/scripts/motif_scan.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,330 @@
+#!/usr/bin/env python
+
+import matplotlib
+matplotlib.use('AGG')
+
+import numpy as np
+import os
+import random
+import string
+import sys
+
+from math import log, pow
+import matplotlib.pyplot as mp
+from multiprocessing import Pool
+from optparse import OptionParser 
+from scipy.stats.stats import pearsonr
+
+from chipsequtil import MACSFile, get_org_settings
+from chipsequtil.nib import NibDB
+from chipsequtil.sampling import rejection_sample_bg
+from TAMO import MotifTools as mt
+from TAMO.MotifTools import load
+
+usage = "%prog [options] <org> <peaks fn> <TAMO motif fn>"
+desc = "Do some motif scanning stuffs"
+parser = OptionParser(usage=usage,description=desc)
+
+parser.add_option('-n','--top-n',dest='top_n',type='int',default=None,
+                  help='use top n peaks by pvalue for sequence scanning [default: all]')
+parser.add_option('-i','--motif-indices',dest='motif_ind',default='all',
+                  help='which indices from <TAMO motif fn> to use [default: %default]')
+parser.add_option('-d','--dir',dest='dir',default='motif_results',
+                  help='write all results into this directory')
+parser.add_option('--fixed-peak-width',dest='fixed_w',type='int',default=None,
+                  help='use only a fixed peak window around the summit instead of whole peak')
+
+revcomp_map = string.maketrans('ACGT','TGCA')
+
+def score_sequence(seq,motif) :
+    ll_max = -sys.maxint
+    for i in range(len(seq)-len(motif)) :
+        # forward strand
+        ll_for_sum = 0
+        subseq = seq[i:i+len(motif)].upper()
+        for n,pos in zip(subseq,motif.ll) :
+            ll_for_sum += pos[n]
+        # reverse strand
+        ll_rev_sum = 0
+        subseq = reversed(subseq.translate(revcomp_map))
+        for n,pos in zip(subseq,motif.ll) :
+            ll_rev_sum += pos[n]
+        ll_max = max(ll_max,ll_for_sum,ll_rev_sum)
+
+    return ll_max
+
+illegal_fn_chars = '/;& ()'
+fn_trans = string.maketrans(illegal_fn_chars,'_'*len(illegal_fn_chars))
+
+def fasta_itr(fn) :
+    f = open(fn)
+    header = None
+    seq = None
+    for l in f :
+        if l.strip().startswith('>') :
+            if seq is not None :
+                yield (header,seq)
+                seq = None
+            header = l.strip()
+        else :
+            seq = seq+l.strip() if seq is not None else l.strip()
+
+    # last record
+    yield (header, seq)
+
+if __name__ == '__main__' :
+
+    opts, args = parser.parse_args(sys.argv[1:])
+
+    if len(args) != 3 :
+        parser.error('Exactly 3 non-option arguments must be provided')
+
+    org, peaks_fn, motif_fn = args
+
+    if not os.path.exists(opts.dir) :
+        os.mkdir(opts.dir)
+
+    peaks_dt = np.dtype([('chr',np.str_,13),('start',np.int32),('end',np.int32),('pvalue',np.float64)])
+    if opts.fixed_w is not None :
+        
+        all_peaks = np.array([(r['chr'],
+                          r['start']+r['summit']-opts.fixed_w/2.,
+                          r['start']+r['summit']+opts.fixed_w/2.,
+                          r['-10*log10(pvalue)']) for r in MACSFile(peaks_fn)],
+                          dtype=peaks_dt)
+    else :
+        all_peaks = np.array([(r['chr'],
+                           r['start'],
+                           r['end'],
+                           r['-10*log10(pvalue)']) for r in MACSFile(peaks_fn)],
+                           dtype=peaks_dt)
+
+    # -10*log10(pvalue) -> -log10(pvalue)
+    all_peaks[:]['pvalue'] /= 10.
+    peak_pvals = all_peaks[:]['pvalue']
+
+    # find the sorted order of peaks by descending pvalue
+    peak_pval_inds = peak_pvals.argsort()
+    peak_pval_inds = peak_pval_inds[::-1] # ascending -> descending
+    all_peaks = all_peaks[peak_pval_inds,:]
+
+    # for pvalue vs motif score
+    pval_num_bins = 20
+    pval_bin_size = all_peaks[:]['pvalue'].size/pval_num_bins
+    # try to take at least 100 sequences, at most 10% of bin size
+    sample_percent = max(min(1.,100./pval_bin_size),0.1)
+    pval_bin_memo = {}
+
+    if opts.top_n is not None :
+        peaks = all_peaks[0:opts.top_n]
+        peak_pvals = peak_pvals[peak_pval_inds][0:opts.top_n]
+    else :
+        peaks = all_peaks
+
+    # extract fasta sequences for these peaks
+    nibDb = NibDB(nib_dirs=get_org_settings(org)['genome_dir'])
+
+    """
+    # get the peak sequences
+    sys.stderr.write('Getting peak sequences\n')
+    fasta_batch = []
+    for i in range(peaks.size) :
+        fasta_batch.append((str(peaks[i]['chr']),int(peaks[i]['start']),int(peaks[i]['end']),'+'))
+    fg_fasta_headers, fg_fasta = nibDb.get_fasta_batch(fasta_batch)
+
+    # need a dict for background sampling
+    # headers have genome_dir and .nib in them, strip that out
+    sys.stderr.write('Converting nib output to dict\n')
+    fg_fasta_headers = list(fg_fasta_headers)
+    fg_fasta_dict = {}
+    for h,s in zip(fg_fasta_headers,fg_fasta) :
+        h = h.replace('>'+get_org_settings(org)['genome_dir']+'/','')
+        h = h.replace('.nib','')
+        if len(s) > 150 :
+            fg_fasta_dict[h] = s
+
+    # now sample the background sequences
+    sys.stderr.write('Sampling bg sequences (len(fg_fasta)==%d)\n'%(len(fg_fasta_dict)))
+    #bg_fasta_dict = rejection_sample_bg(fg_fasta_dict,org,bg_match_epsilon=1e-3,verbose=True)
+    bg_fasta_dict = {}
+    bg_fasta = bg_fasta_dict.values()
+    """
+
+    # load the motifs
+    sys.stderr.write('Movin right along\n')
+    motifs = load(motif_fn)
+
+    if opts.motif_ind != 'all' :
+        motif_indices = [int(i) for i in opts.motif_ind.split(',') if len(i) != 0]
+        motifs = [motifs[i] for i in motif_indices]
+    else :
+        motif_indices = xrange(len(motifs))
+
+    # use all cores w/ a Pool
+    #pool = Pool(processes=opts.n_procs)
+
+    # go through each motif
+    job_params = []
+    res = []
+    #for i,m in zip(motif_indices,motifs) :
+    #    job_params.append((i,m,peak_pvals,fg_fasta,bg_fasta,opts.dir))
+    #seq_scores = pool.map(analyze_motif_sequences,job_params)
+
+    seq_scores = []
+    for m_i,m in zip(motif_indices,motifs) :
+
+        out_dir = opts.dir
+
+        try :
+            m_name = m.source.split('\t')[2]
+        except :
+            m_name = m.source.split()[0]
+
+        print 'starting',m_name
+
+        # pvalue vs motif score
+        pval_bin_bounds = []
+        pval_bin_pvals = []
+        pval_bin_ranges = np.arange(0,all_peaks[:]['pvalue'].size,pval_bin_size)
+        for st_i in pval_bin_ranges :
+
+            end_i = min(st_i+pval_bin_size,all_peaks[:]['pvalue'].size-1)
+            st_val = all_peaks[st_i]['pvalue']
+            end_val = all_peaks[end_i]['pvalue']
+
+            #print st_i, end_i, pval_bin_size, st_val, end_val
+
+            # keep track of the pvalue bounds of each bin
+            pval_bin_bounds.append((st_val,end_val))
+
+            # we sample sample_percent% of peaks in the bin to score
+            num_to_sample = int(sample_percent*(end_i-st_i))
+            inds_to_sample = random.sample(xrange(st_i,end_i),num_to_sample)
+
+            # we memoize the sequences we've seen before so we don't fetch seqs
+            # unnecessarily
+            unmemoed_inds_to_sample = set(inds_to_sample).difference(set(pval_bin_memo.keys()))
+
+            bin_fasta_batch = []
+            for peak_i in unmemoed_inds_to_sample :
+                bin_fasta_batch.append((str(all_peaks[peak_i]['chr']),
+                                        int(all_peaks[peak_i]['start']),
+                                        int(all_peaks[peak_i]['end']),
+                                        '+'))
+
+            if len(bin_fasta_batch) != 0 :
+                bin_headers, bin_seq = nibDb.get_fasta_batch(bin_fasta_batch)
+
+                for i, ind in enumerate(unmemoed_inds_to_sample) :
+                    pval_bin_memo[ind] = bin_seq[i].upper()
+
+            # score the sequences
+            pval_bin_pvals.append([])
+            for ind in inds_to_sample :
+                max_score = m.bestscan(pval_bin_memo[ind])
+                max_score = (max_score-m.minscore)/(m.maxscore-m.minscore)
+                pval_bin_pvals[-1].append(max_score)
+            pval_bin_pvals[-1] = np.array(pval_bin_pvals[-1])
+
+
+        mp.figure(figsize=(4,4))
+        font = {'size':'9'}
+        mp.rc('font',**font)
+
+        # box plot of the bins
+        mp.boxplot(pval_bin_pvals,positions=np.arange(len(pval_bin_pvals)))
+
+        # plot the means of the bins
+        #[(x[0]+x[1])/2. for x in pval_bin_bounds]
+        mp.plot(np.arange(len(pval_bin_pvals)),
+             [x.mean() for x in pval_bin_pvals],'bo')
+        mp.title('Sampled motif score vs binned peak pvalue')
+        mp.xlabel('Binned -log10(pvalue)')
+        mp.ylabel('Maximum normalized motif score')
+
+        img_fn = os.path.join(out_dir,m_name.translate(fn_trans)+'_%d_peakmot.png'%m_i)
+        mp.savefig(img_fn)
+        mp.clf()
+
+        continue
+
+        fg_ratios = []
+        for seq in fg_fasta :
+            #max_score = score_sequence(seq,m)
+            max_score = m.bestscan(seq.upper())
+            fg_ratios.append((max_score-m.minscore)/(m.maxscore-m.minscore))
+        fg_ratios = np.array(fg_ratios)
+
+        bg_ratios = []
+        for seq in bg_fasta :
+            #max_score = score_sequence(seq,m)
+            max_score = m.bestscan(seq.upper())
+            bg_ratios.append((max_score-m.minscore)/(m.maxscore-m.minscore))
+        bg_ratios = np.array(bg_ratios)
+
+        fg_mean = sum(fg_ratios)/len(fg_ratios)
+        fg_std = np.std(fg_ratios)
+        bg_mean = sum(bg_ratios)/len(bg_ratios)
+        bg_std = np.std(bg_ratios)
+
+        m_mat = np.array((fg_ratios,bg_ratios,peak_pvals))
+        fg_score_sort_inds = m_mat[0,:].argsort()
+
+        motif_score_cnts, motif_score_bins = np.histogram(m_mat[0,:],bins=20)
+        binned_motif_scores = []
+        for st, end in zip(motif_score_bins[:-1],motif_score_bins[1:]) :
+            binned_motif_scores.append(m_mat[2,(m_mat[0,:]>=st)&(m_mat[0,:]<end)])
+
+        mp.figure(figsize=(4,4))
+        font = {'size':'9'}
+        mp.rc('font',**font)
+
+        mp.plot(fg_ratios,peak_pvals,'bo')
+
+        # calculate pearson correlation coefficient
+        pear_r, pear_pval = pearsonr(fg_ratios,peak_pvals)
+        mp.title('Max motif strength vs peak pvalue\n(r=%.2f,pval=%.2g)'%(pear_r,pear_pval))
+        img_fn = os.path.join(out_dir,m_name.translate(fn_trans)+'_%d_corr.png'%m_i)
+        mp.savefig(img_fn)
+        mp.clf()
+
+        # line plot of average peak p-value for binned motif score
+        mp.title('Average peak p-value for binned motif score\n%s'%m_name)
+        mp.xlabel('normalized motif score')
+        mp.ylabel('-log10(pvalue)')
+        mp.boxplot(binned_motif_scores,positions=np.arange(motif_score_bins.size-1),sym='')
+        p = mp.plot(np.arange(motif_score_bins.size-1),
+                [x.mean() for x in binned_motif_scores],
+                'bo',
+                label='Mean fg score')
+        p = p[0]
+
+        # draw a crosshair
+        bg_median_ind = np.argwhere(((motif_score_bins<=bg_mean)[:-1] & (motif_score_bins>=bg_mean)[1:])).ravel()[0]
+        bg_median = np.median(binned_motif_scores[bg_median_ind])
+        xlim, ylim = p.axes.get_xlim(), p.axes.get_ylim()
+        mp.plot([bg_median_ind,bg_median_ind],ylim,'k-',label='Mean bg score=%.2g'%m_mat[1,:].mean())
+        mp.plot(xlim,[bg_median,bg_median],'k-')
+        mp.xticks(np.arange(motif_score_bins.size)[1::5],['%.2f'%x for x in motif_score_bins[1::5]])
+        mp.legend(loc='upper left')
+
+        img_fn = os.path.join(out_dir,m_name.translate(fn_trans)+'_%d_peakmot.png'%m_i)
+        mp.savefig(img_fn)
+        mp.clf()
+
+        ret_d ={'m_name': m_name,
+                'fg_mean': fg_mean,
+                'fg_std': fg_std,
+                'bg_mean': bg_mean,
+                'bg_std': bg_std,
+                'fg_scores': fg_ratios,
+                'bg_scores': bg_ratios,
+                #'wmw_pval': WMWtest(fg_ratios,bg_ratios)
+               }
+
+        # binned pvalue vs sampled motif score
+        
+
+        print 'done with',m_name
+
+        seq_scores.append(ret_d)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/scripts/nibFrag.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,128 @@
+#!/usr/bin/env python
+# nibFrag.py - a python implementation of Jim Kent's nibFrag command line utility
+
+import sys
+import warnings
+from optparse import OptionParser, OptionGroup
+
+from chipsequtil import get_file_parts, BEDFile
+from chipsequtil.nib import get_nib_batch, validate_nib_file, NibException, NOMASK, MASK, HARDMASK
+
+usage = '%prog [options] file.nib start end strand [outfile]\n  -- or --\n%prog [options] --batch file.nib batchfile [batchfile ...]'
+description = """A python implementation of Jim Kent's nibFrag utility that allows outputting to \
+stdout.  Otherwise the functionality is identical for the non-batch usage.  Batch mode accepts \
+one or more files containing sets of coordinates to extract from the nib file.  Only BED formatting \
+is accepted at the moment. All sequences are concatenated together in FASTA format.  To retrieve the \
+entire sequence, use END as the end argument."""
+epilog="Note: When specifying --name optionin batch mode, also specify --dbHeader to ensure unique FASTA headers."
+parser = OptionParser(usage=usage,description=description,epilog=epilog)
+#parser.add_option('--output',dest='output',default=sys.stdout,help='filename to write output to [default: stdout]')
+parser.add_option('--no-header',dest='no_header',action='store_true',help='only output sequence (no fasta header)')
+parser.add_option('--wrap-width',dest='wrap_width',type='int',default=50,help='wrap output sequence at this number of bases, 0 indicates no wrap (sequence ends up on single line) [default: %default]')
+parser.add_option('--batch',dest='batch',action='store_true',help='run in batch mode, interpret arguments after nib file as queries')
+parser.add_option('--batch-format',dest='batch_format',type='choice',choices=['BED'],default='BED',help='format to interpret batch files [default: %default]')
+#parser.add_option('--mask-type',dest='mask_type',type='choice',choices=['NOMASK','MASK','HARDMASK'],default='NOMASK',help='how to handle masked positions, correspond to original nibFrag options --masked and --hardMasked [default: %default]')
+
+# original nibFrag usage:
+#nibFrag - Extract part of a nib file as .fa (all bases/gaps lower case by default)
+#usage:
+#   nibFrag [options] file.nib start end strand out.fa
+#where strand is + (plus) or m (minus)
+#options:
+#   -masked - use lower case characters for bases meant to be masked out
+#   -hardMasked - use upper case for not masked-out and 'N' characters for masked-out bases
+#   -upper - use upper case characters for all bases
+#   -name=name Use given name after '>' in output sequence
+#   -dbHeader=db Add full database info to the header, with or without -name option
+#   -tbaHeader=db Format header for compatibility with tba, takes database name as argument
+
+# original nibFrag options
+nibFrag_grp = OptionGroup(parser,"Original nibFrag options")
+nibFrag_grp.add_option('--masked',dest='masked',action='store_true',help='use lower case characters for bases meant to be masked out')
+nibFrag_grp.add_option('--hardMasked',dest='hardmasked',action='store_true',help='use upper case for non masked-out and \'N\' characters for masked-out bases')
+nibFrag_grp.add_option('--upper',dest='upper',action='store_true',help='use upper case characters for all bases')
+nibFrag_grp.add_option('--name',dest='name',default=None,help='Use given name after \'>\' in output sequence')
+nibFrag_grp.add_option('--dbHeader',dest='dbHeader',default=None,help='Add full database info to the header, with or without -name option')
+nibFrag_grp.add_option('--tbaHeader',dest='tbaHeader',default=None,help='Format header for compatibility with tba, takes database name as argument')
+parser.add_option_group(nibFrag_grp)
+
+
+if __name__ == '__main__' :
+
+    opts, args = parser.parse_args(sys.argv[1:])
+
+    if len(args) < 1 :
+        parser.print_usage()
+        parser.exit(1)
+
+    # setup
+    nib_path = args[0]
+    nib_dir,nib_fn,nib_base,nib_ext = get_file_parts(nib_path)
+
+    queries = []
+    if opts.batch :
+
+        if len(args) < 2 :
+            parser.error('Two arguments must be supplied in batch mode')
+
+        batch_fns = args[1:]
+
+        for fn in batch_fns :
+            if opts.batch_format == 'BED' :
+                for bed in BEDFile(fn) :
+                    if bed['chrom'] != nib_base :
+                        warnings.warn('Chromosome in BED line %s does not match file %s, skipping'%(bed['chrom'],nib_base))
+                    else :
+                        queries.append((int(bed['chromStart']),int(bed['chromEnd']),bed['strand']))
+    else :
+
+        if len(args) < 4 :
+            parser.error('Four arguments must be supplied in non-batch mode')
+
+        # setup
+        strand = args[3]
+        start, end = int(args[1]),args[2]
+        if end == 'END' :
+            end = -1
+        else :
+            end = int(end)
+            if end < start :
+                parser.error('Stop coordinate %d smaller than start %d'%(end,start))
+
+        queries.append((start,end,strand))
+
+    mask_type = NOMASK
+    if opts.masked :
+        mask_type = MASK
+    elif opts.hardmasked :
+        mask_type = HARDMASK
+
+    # set the output file
+    if len(args) > 4 :
+        out_f = open(args[4],'w')
+    else :
+        out_f = sys.stdout
+
+    # get the sequences from the .nib file
+    try :
+        headers, seqs = get_nib_batch(nib_path,queries,mask_type)
+    except NibException, e :
+        sys.stderr.write(e.message+'\n')
+        sys.exit(1)
+
+    nbases = validate_nib_file(nib_path)
+
+    # output all queries
+    for header, seq in zip(headers,seqs) :
+
+        # write output
+        out_f.write(header)
+
+        if opts.upper :
+            seq = seq.upper()
+        if opts.wrap_width == 0 :
+            out_f.write(seq+'\n')
+        else :
+            for i in xrange(0,len(seq),opts.wrap_width) :
+                out_f.write(seq[i:i+opts.wrap_width]+'\n')
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/scripts/org_settings.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,126 @@
+#!/usr/bin/env python
+
+import os
+import sys
+from optparse import OptionParser
+from ConfigParser import ConfigParser, NoSectionError
+from pprint import pformat
+
+from chipsequtil import get_org_settings, get_global_settings, get_all_settings, get_local_settings, GLOBAL_SETTINGS_FN, LOCAL_SETTINGS_FN
+
+usage = '%prog [options] [<org key> [<org setting>]]'
+description='''Tool for retrieving sets of organism-specific settings and paths.
+Original paths are set at install time, and can be overridden in the file ~/.org
+settings.cfg. Allows output of settings in a variety of shell environment
+syntaxes.  The tool attempts to guess which shell environment is being used by
+examining the SHELL environment variable unless explicitly set.  When run without
+an argument, returns a listing of all settings available.
+'''
+parser = OptionParser(usage=usage,description=description)
+parser.add_option('-s','--syntax',dest='syntax',type='choice',\
+                  choices=['auto','python','bash','tcsh'],default='auto',help='syntax flavor \
+                  of output to produce [default: %auto]')
+parser.add_option('-l','--list',dest='list_sets',action='store_true',help='print \
+                  all available settings for human consumption')
+
+
+def obj_to_format(obj,format='python') :
+    '''Convert *obj* into a string that can be evaluated in the environment \
+    indicated in *format*.
+
+    obj -- a string, a dict of values, or a dict of dicts of values
+    format -- python (default), or bash
+    '''
+
+    if format == 'auto' :
+        format = os.environ.get('SHELL','python').split('/')[-1]
+
+    r = ''
+    if format == 'python' :
+        r = pformat(obj)
+    elif format in ['sh','bash','zsh','csh','tcsh'] :
+        statements = []
+        if format in ['sh','bash','zsh'] :
+            export_tmpl = 'export %s=%s'
+        elif format in ['csh','tcsh'] :
+            export_tmpl = 'setenv %s %s'
+
+        # dict
+        if isinstance(obj,dict) :
+            for k1, v1 in obj.items() :
+                # dict of dicts
+                if isinstance(v1,dict) :
+                    # these should be literal values
+                    for k2, v2 in v1.items() :
+                        statements.append(export_tmpl%('_'.join([k1,k2]).upper(),\
+                                          str(v2)))
+                else :
+                    v1 = str(v1)
+                    s = '\''+v1+'\'' if v1.count(' ') != 0 else str(v1)
+                    statements.append(export_tmpl%(k1.upper(),str(s)))
+        else :
+            return str(obj)
+
+        r = '\n'.join(statements)
+
+    return r
+
+
+if __name__ == '__main__' :
+
+    opts, args = parser.parse_args(sys.argv[1:])
+
+    # output depends on number of arguments passed
+    output = ''
+
+    # return everything we know about
+    if len(args) == 0 :
+
+        if opts.list_sets :
+
+            # always use python formatting when listing
+            opts.syntax = 'python'
+
+            # global settings
+            settings = get_global_settings()
+            output = 'Global settings: (%s)\n'%GLOBAL_SETTINGS_FN
+            output += obj_to_format(settings,opts.syntax) + '\n'
+
+            # local settings
+            settings = get_local_settings()
+            output += 'Local settings: (%s)\n'%LOCAL_SETTINGS_FN
+            output += obj_to_format(settings,opts.syntax)
+        else :
+            settings = get_all_settings()
+            output += obj_to_format(settings,opts.syntax)
+
+
+    # return all records from the specific organism
+    elif len(args) in (1,2) :
+
+        # make sure our config files have the requested organism
+        try :
+            settings = get_org_settings(args[0])
+        except NoSectionError :
+            sys.stderr.write('No entry %s found, available:\n'%args[0]+\
+                             pformat(get_all_settings().keys())+'\nExiting\n')
+            sys.exit(1)
+
+        # return the requested field from the specific organism
+        if len(args) == 2 :
+
+            # make sure the config file has the setting for this organism
+            try :
+                output = obj_to_format(settings[args[1]],opts.syntax)
+            except KeyError :
+                sys.stderr.write('Setting %s not found for %s, choices:\n'%(args[1],args[0])+
+                                 pformat(settings.keys())+'\nExiting\n')
+                sys.exit(2)
+        else :
+            output = obj_to_format(settings,opts.syntax)
+    else :
+        parser.error('Provide zero, one, or two argments, found %s'%args)
+
+    # bon voyage
+    sys.stdout.write(output+'\n')
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/scripts/peaks_to_fasta.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,144 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import textwrap
+import warnings
+from optparse import OptionParser
+
+from chipsequtil import BEDFile, MACSFile, get_file_parts, get_org_settings
+from chipsequtil.nib import NibDB
+from chipsequtil.sampling import rejection_sample_bg
+from chipsequtil.util import MultiLineHelpFormatter
+from chipsequtil.seq import write_fasta_to_file
+
+
+usage='%prog [options] <organism> <peak file> [<peak file> ...]'
+description='''Extract sequences for peaks in provided peak file(s).  Can \
+interpret MACS or BED output, determined automatically by .xls or .bed extensions \
+respectively (force explicit format with --peak-format option).  Outputs fasta \
+sequences for the peaks in all files extracted from the reference genome specified \
+by the output of *org_settings.py <organism> genome_dir* to stdout by default.\
+Chromosome names in peak files must match nib filenames without extension (e.g. \
+peak line: chr1 0  100 searches *genome_dir*/chr1.nib).  Fasta records have the \
+following format:
+
+><chromosome>:<start>-<end>;fn=<name of file>:<line number>;db_fn=<db filename>;fmt=<format>;<source alignment info>
+<sequence...>
+
+<db filename> is the filename where the sequence was extracted, <format> is the \
+format of the input file (MACS or BED), and <source alignment info> contains all \
+the fields from the originating alignment according to the source format.'''
+parser = OptionParser(usage=usage,description=description,formatter=MultiLineHelpFormatter())
+parser.add_option('--min-header',dest='min_header',action='store_true',help='only store <chromosome>:<start>-<end> in header')
+parser.add_option('--peak-format',dest='peak_format',type='choice',
+                  choices=['auto','MACS','BED'],default='auto',
+                  help='peak file format, \'auto\' determines format by extension, choices: MACS, BED, auto [default: %default]')
+parser.add_option('--output',dest='output',default=None,help='filename to output fasta records to [default: stdout]')
+parser.add_option('--fixed-peak-width',dest='fixed_peak_width',type='int',default=None,help='return a fixed number of bases flanking peak summit (*summit* field in MACS, (end-start)/2 in BED), ignoring start/stop coords [default: None]')
+parser.add_option('--wrap-width',dest='wrap_width',type='int',default=70,help='wrap fasta sequences to specified width. -1 indicates no wrap [default: %default]')
+
+
+def bed_to_fasta(fn,db,min_header=False) :
+    #headers,seqs = db.get_fasta_from_bed(fn)
+    fastas = []
+    bed_recs = BEDFile(fn)
+    for i,rec in enumerate(bed_recs) :
+
+        if opts.fixed_peak_width :
+            midpoint = (rec['chromEnd']-rec['chromStart'])/2
+            start = max(0,midpoint-opts.fixed_peak_width/2)
+            end = min(midpoint+opts.fixed_peak_width/2,db.db_info[rec['chrom']]['nbases'])
+            coords = start, end
+        else :
+            coords = start,end = int(rec['chromStart']), int(rec['chromEnd'])
+
+        seq = db.get_seq(rec['chrom'], start, end)
+        seq_fn = db.db_info[rec['chrom']]['path']
+
+        header = '%s:%s;'%(rec['chrom'],'%d-%d'%(start,end))
+        if not min_header :
+            header = header.strip()+'%s:%d;fmt=BED;'%(fn,i)+ \
+                     ';'.join(['%s=%s'%(k,str(v)) for k,v in rec.items()])
+        fastas.append((header,seq))
+
+    return fastas
+
+
+def macs_to_fasta(fn,db,min_header=False) :
+    macs_recs = MACSFile(fn)
+    fasta = []
+    for i,rec in enumerate(macs_recs) :
+
+        if opts.fixed_peak_width :
+            # adjust start and end peak position based on summit, ensuring we don't step outside of the reference sequence bounds
+            start = max(0, rec['start']+rec['summit']-opts.fixed_peak_width/2)
+            end = min(rec['start']+rec['summit']+opts.fixed_peak_width/2, db.db_info[rec['chr']]['nbases'])
+            coords = start, end
+        else :
+            start, end = coords = rec['start'], rec['end']
+
+        seq = db.get_seq(rec['chr'],start,end)
+        seq_fn = db.db_info[rec['chr']]['path']
+
+        header = '%s:%s'%(rec['chr'],'%d-%d'%coords)
+        if not min_header :
+            header += ';%s:%d;db_fn=%s;fmt=MACS;'%(fn,i,seq_fn) + \
+                     ';'.join(['%s=%s'%(k,str(v)) for k,v in rec.items()])
+        fasta.append((header,seq))
+
+    return fasta
+
+
+if __name__ == '__main__' :
+
+    opts, args = parser.parse_args(sys.argv[1:])
+
+    if len(args) < 2 :
+        parser.error('Must provide at least two non-option arguments')
+
+    # instantiate the NibDB from the provided directory
+    organism = args[0]
+    nib_dir = get_org_settings(organism)['genome_dir']
+    nib_db = NibDB(nib_dirs=[nib_dir])
+
+    # determine specified format
+    peak_fmt = opts.peak_format
+
+    peak_fns = args[1:]
+
+    # determine if there is an output file
+    if opts.output :
+        out_f = open(opts.output,'w')
+    else :
+        out_f = sys.stdout
+
+    fasta_recs = []
+    for peak_fn in peak_fns :
+        # if --peak-format is auto, figure format out from extension
+        if opts.peak_format == 'auto' :
+            fnbase, fnext = os.path.splitext(peak_fn)
+            if fnext.lower() == '.bed' : # BED file
+                peak_fmt = 'BED'
+            elif fnext.lower() == '.xls' : # MACS file
+                peak_fmt = 'MACS'
+            else  :
+                warnings.warn('Peak format specified as auto but file extension \
+                               not recognized in file %s, skipping'%peak_fn)
+                continue
+
+        if peak_fmt == 'BED' :
+            fasta_recs.extend(bed_to_fasta(peak_fn,nib_db,min_header=opts.min_header))
+        elif peak_fmt == 'MACS' :
+            fasta_recs.extend(macs_to_fasta(peak_fn,nib_db,min_header=opts.min_header))
+
+    # write out foreground to file
+    if opts.output :
+        if opts.wrap_width == -1 :
+            opts.wrap_width = sys.maxint
+        write_fasta_to_file(dict(fasta_recs),opts.output,linelen=opts.wrap_width)
+    else :
+        for header, seq in fasta_recs :
+            if opts.wrap_width != -1 :
+                seq = textwrap.fill(seq,opts.wrap_width)
+            sys.stdout.write('>%s\n%s\n'%(header,seq))
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/scripts/plot_peak_loc_dist.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,225 @@
+#!/usr/bin/env python
+
+import matplotlib
+matplotlib.use('AGG')
+
+import matplotlib.pyplot as mp
+import numpy as np
+import os
+import sys
+
+from collections import defaultdict
+from csv import reader, writer
+from optparse import OptionParser
+from StringIO import StringIO
+
+from chipsequtil import MACSFile, BEDFile
+
+
+usage = '%prog [options] <peaks fn> <gene list fn>'
+desc = """Produce a pie chart of the locations of peaks in different bins
+(promoter, gene, exon, intron, etc.) and, optionally, save the different
+records to their own files for subsequent analysis.  Also produce a histogram
+of distance from feature values in mapping file. Peaks file is expected
+to be as output by MACS, or alternately as a BED file but then the -b plot
+is not available.  Gene list file is expected to be in the format as
+output by peaks_to_known_genes.py script."""
+parser = OptionParser(usage=usage,description=desc)
+parser.add_option('-b','--bar-fn',dest='bar_fn',default=None,help='filename for pvalue stacked bar chart')
+parser.add_option('-g','--gene-pie-fn',dest='gene_pie_fn',default=None,help='filename for pie chart image')
+parser.add_option('-p','--peak-pie-fn',dest='peak_pie_fn',default=None,help='filename for pie chart image')
+parser.add_option('-f','--dist-fn',dest='dist_fn',default=None,help='filename for distance from feature image')
+parser.add_option('-s','--save',dest='save',action='store_true',help='write out files containing peaks for each category')
+parser.add_option('-d','--output-dir',dest='out_dir',default='.',help='output files created by --save option to this directory')
+parser.add_option('--no-plot',dest='no_plot',action='store_true',help='dont show (but save) the figure produced')
+parser.add_option('--peaks-format',dest='peak_fmt',type='choice',choices=['MACS','BED'],default='MACS',help='format of peaks file, either MACS or BED [default: MACS]')
+
+GENE_FIELD_NAMES = ['knowngene_id','gene_symbol']
+LOC_FIELD_NAMES = ['peak_loc','dist_from_feature','score','map_type','map_subtype']
+int_or_none = lambda x: int(x) if x != '' else None
+float_or_none = lambda x: float(x) if x != '' else None
+LOC_FIELD_TYPES = [int_or_none,float_or_none,float_or_none,str,str]
+
+
+if __name__ == '__main__' :
+
+    opts, args = parser.parse_args(sys.argv[1:])
+
+    if len(args) != 2 :
+        parser.error('Exactly 2 non-option argument is required')
+
+    peaks_fn, gene_fn = args
+
+    if opts.peak_fmt == 'BED' :
+        peaks_f = BEDFile(peaks_fn)
+    else :
+        peaks_f = MACSFile(peaks_fn)
+
+    gene_reader = reader(open(gene_fn),delimiter='\t')
+    gene_recs, macs_recs, loc_recs = [], [], []
+    gene_reader.next() # get rid of header
+
+    gene_field_cnt = len(GENE_FIELD_NAMES)
+    macs_field_cnt = len(MACSFile.FIELD_NAMES)
+    loc_field_cnt = len(LOC_FIELD_NAMES)
+    for rec in gene_reader :
+
+        gene_recs.append(dict(zip(GENE_FIELD_NAMES,rec[:gene_field_cnt])))
+
+        # this automatically coerces recs into correct format
+        macs_line = [f(x) for f,x in zip(MACSFile.FIELD_TYPES,rec[gene_field_cnt:gene_field_cnt+macs_field_cnt])]
+        macs_recs.append(dict(zip(MACSFile.FIELD_NAMES,macs_line)))
+
+        loc_line = [f(x) for f,x in zip(LOC_FIELD_TYPES,rec[gene_field_cnt+macs_field_cnt:])]
+        loc_recs.append(dict(zip(LOC_FIELD_NAMES,loc_line)))
+
+    loc_dist = defaultdict(int)
+    unique_peaks = defaultdict(set)
+    exon_scores, intron_scores = [], []
+    dist_to_features = defaultdict(list)
+    pvals = defaultdict(list)
+
+    fn_base, fn_ext = os.path.splitext(gene_fn)
+    if opts.save :
+        def get_writer(fn) :
+            fd = writer(open(fn,'w'),delimiter='\t')
+            header = MACSFile.FIELD_NAMES
+            if opts.peak_fmt == 'BED' :
+                header = BEDFile.FIELD_NAMES
+            fd.writerow(GENE_FIELD_NAMES+header+LOC_FIELD_NAMES)
+            return fd
+        fds = {}
+
+    for gene, peak, loc in zip(gene_recs, macs_recs, loc_recs) :
+        # weird case, not sure why this happens
+        if loc['map_subtype'] == '0' :
+            loc['map_subtype'] = ''
+        key = loc['map_type']+'_%s'%loc['map_subtype'] if loc['map_subtype'] != '' else loc['map_type']
+        loc_dist[key] += 1
+        dist_to_features[key].append(int(loc['dist_from_feature']))
+        if opts.peak_fmt == 'MACS' :
+            pvals[key].append(float(peak['-10*log10(pvalue)']))
+
+        map_key = '%s:%d-%d'%(peak['chr'],peak['start'],peak['end'])
+        unique_peaks[key].add(map_key)
+
+        if key == 'gene_exon' :
+            exon_scores.append(loc['score'])
+        elif key == 'gene_intron' :
+            intron_scores.append(loc['score'])
+
+        if opts.save :
+            row = [gene[f] for f in GENE_FIELD_NAMES] + \
+                  [peak[f] for f in MACSFile.FIELD_NAMES] + \
+                  [loc[f] for f in LOC_FIELD_NAMES]
+            if not fds.has_key(key) :
+                fn = os.path.join(opts.out_dir,fn_base+'_'+key+fn_ext)
+                fds[key] = get_writer(fn)
+            fds[key].writerow(row)
+
+    # now find which peaks are intergenic
+    intergenic = []
+    num_peaks = 0
+    all_unique_peaks = reduce(lambda x,y: x.union(y), unique_peaks.values())
+    for l in peaks_f :
+        peak = l
+        map_key = '%s:%d-%d'%(peak['chr'],peak['start'],peak['end'])
+        if map_key not in all_unique_peaks :
+            unique_peaks['intergenic'].add(map_key)
+            intergenic.append(peak)
+            if opts.peak_fmt == 'MACS' :
+                pvals['intergenic'].append(peak['-10*log10(pvalue)'])
+        num_peaks += 1
+
+    num_int = len(intergenic)
+    loc_dist['intergenic'] = num_int
+    if opts.save :
+        fn = os.path.join(opts.out_dir,fn_base+'_intergenic.xls')
+        fd = writer(open(fn,'w'),delimiter='\t')
+        fd.writerow(MACSFile.FIELD_NAMES)
+        fd.writerows([[x[f] for f in MACSFile.FIELD_NAMES] for x in intergenic])
+
+    exon_scores, intron_scores = np.array(exon_scores), np.array(intron_scores)
+
+    font = {'size':'9'}
+    mp.rc('font',**font)
+    fig = mp.figure(figsize=(4,4))
+
+    bin_order = ('intergenic','gene_exon','gene_intron','promoter','after')
+    colors = 'bgrcm'
+
+    # pie chart
+    #pie_ax_rect = [0.1,0.35, 0.4125,  0.525 ] # left, bottom, width, height
+    pie_ax = fig.add_axes((0.15,0.15,0.7,0.7))
+    pie_ax.set_title('Gene map distribution\n%d peaks'%num_peaks)
+    pie_labels, pie_values = [], []
+    for k in bin_order :
+        pie_labels.append(k+'\n%d'%(len(unique_peaks[k])))
+        pie_values.append(len(unique_peaks[k]))
+    pie_ax.pie(pie_values,labels=pie_labels)
+
+    img_fn = fn_base+'_gene_loc.png' if opts.gene_pie_fn is None else opts.gene_pie_fn
+    mp.savefig(img_fn)
+    mp.clf()
+
+
+    fig = mp.figure(figsize=(4,4))
+    pie_ax = fig.add_axes((0.15,0.15,0.7,0.7))
+    pie_ax.set_title('Peak map distribution\n%d peaks'%num_peaks)
+    pie_labels, pie_values = [], []
+    for k in bin_order :
+        pie_labels.append(k+'\n%d'%(loc_dist[k]))
+        pie_values.append(loc_dist[k])
+    pie_ax.pie(pie_values,labels=pie_labels)
+
+    img_fn = fn_base+'_peak_loc.png' if opts.peak_pie_fn is None else opts.peak_pie_fn
+    mp.savefig(img_fn)
+    mp.clf()
+
+    fig = mp.figure(figsize=(4,4))
+    # dist to feature histogram
+    #hist_ax_rect = [0.65,0.45,0.25,0.45]
+    hist_ax = fig.add_axes((0.15,0.15,0.7,0.7))
+    hist_ax.set_title('Peak distance from TSS')
+    # join all the lists together
+    dists = sum(dist_to_features.values(),[])
+    pdf, bins, patches = hist_ax.hist(dists,bins=20)
+    #h = mp.hist(dists,bins=20)
+    hist_ax.set_xlim((int(min(dists)),int(max(dists))))
+
+    dist_fn = fn_base+'_dist.png' if opts.dist_fn is None else opts.dist_fn
+    mp.savefig(dist_fn)
+    mp.clf()
+
+    if opts.peak_fmt == 'MACS' :
+        fig = mp.figure(figsize=(4,4))
+        bar_ax = fig.add_axes((0.15,0.15,0.7,0.7))
+        pval_hists = {}
+        min_pval, max_pval = min([min(v) for v in pvals.values()]), max([max(v) for v in pvals.values()])
+        for key,pvals in pvals.items() :
+            vals, bins = np.histogram(pvals,range=(0,max_pval),bins=20)
+            lv = np.log10(vals)
+            lv[np.isneginf(lv)] = 0.1
+            pval_hists[key] = lv
+
+        pval_items = [(k,pval_hists[k]) for k in bin_order if pval_hists.has_key(k)]
+        bar_width = 0.85*(max_pval-min_pval)/(len(bins)-1)
+        print max_pval, min_pval, len(bins)
+        print 'bar_width:',bar_width
+        bars = []
+        b = bar_ax.bar(bins[:-1],pval_items[0][1],width=bar_width,color=colors[0])
+        bars.append(b)
+
+        sum_bottoms = pval_items[0][1]
+        for i, (key, pvals) in enumerate(pval_items[1:]) :
+            b = bar_ax.bar(bins[:-1],pvals,bottom=sum_bottoms,width=bar_width,color=colors[i+1])
+            bars.append(b)
+            sum_bottoms += pvals
+        bar_ax.legend([b[0] for b in bars],[x[0] for x in pval_items])
+        bar_ax.axis((-10,max(bins),0,max(sum_bottoms)))
+        bar_ax.set_title('Peak map distribution by pvalue')
+        bar_ax.set_xlabel('-10*log10(pvalue)')
+        bar_ax.set_ylabel('relative log10(# peaks)')
+
+        pval_fn = fn_base+'_pval_bar.png' if opts.bar_fn is None else opts.bar_fn
+        mp.savefig(pval_fn)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/scripts/plot_pos_vs_neg_peaks.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,103 @@
+#!/usr/bin/env python
+
+import os
+import sys
+
+import matplotlib
+matplotlib.use('AGG')
+
+from matplotlib.pyplot import *
+from numpy import arange, log10
+from optparse import OptionParser
+
+from chipsequtil import MACSFile
+
+usage = '%prog [options] <pos peaks fn> <neg peaks fn>'
+parser = OptionParser(usage=usage)
+parser.add_option('-o','--output',dest='out_fn',default=None,help='filename of output image')
+
+if __name__ == '__main__' :
+
+    opts, args = parser.parse_args(sys.argv[1:])
+    pos_fn, neg_fn = args
+
+    pos_f, neg_f = MACSFile(pos_fn), MACSFile(neg_fn)
+
+    pos_peaks = []
+    pos_pvals = []
+    for pk in pos_f :
+        pos_pvals.append(float(pk['-10*log10(pvalue)'])/10.)
+        pos_peaks.append((pk['-10*log10(pvalue)'],pk))
+
+    pos_peaks.sort()
+
+    neg_peaks = []
+    neg_pvals = []
+    for pk in neg_f :
+        neg_pvals.append(float(pk['-10*log10(pvalue)'])/10.)
+        neg_peaks.append((pk['-10*log10(pvalue)'],pk))
+
+    neg_peaks.sort()
+
+    min_pval, max_pval = min(pos_pvals+neg_pvals), max(pos_pvals+neg_pvals)
+
+    pval_rng = arange(min_pval,max_pval,(max_pval-min_pval)/100.)
+
+    # construct cdfs
+    pos_cdf, neg_cdf = [], []
+    for pval in pval_rng :
+        pos_cdf.append(len(filter(lambda x: x >= pval,pos_pvals)))
+        neg_cdf.append(len(filter(lambda x: x >= pval,neg_pvals)))
+
+    # normalize cdfs
+    pos_cdf_norm = [1.*x/max(pos_cdf) for x in pos_cdf]
+    neg_cdf_norm = [1.*x/max(neg_cdf) for x in neg_cdf]
+
+    # log of pvals
+    pos_logs = map(log10,pos_cdf)
+    neg_logs = map(log10,neg_cdf)
+    plot(pval_rng,pos_logs)
+    plot(pval_rng,neg_logs)
+    ytics, ylabs = yticks()
+    clf()
+
+    # normalize logs for plotting
+    pos_logs_norm = [1.-x/max(pos_logs) for x in pos_logs]
+    neg_logs_norm = [1.-x/max(neg_logs) for x in neg_logs]
+
+    # calculate pos proportion for each pvalue
+    pos_ratio = []
+    pos_only = []
+    for pos, neg in zip(pos_cdf,neg_cdf) :
+        #pos_ratio.append(pos/(pos+neg))
+        if neg == 0 :
+            pos_only.append(pos_ratio[-1])
+            #pos_ratio.append(pos_ratio[-1])
+        else :
+            pos_ratio.append(pos/neg)
+
+    subplot(211)
+    plot(pval_rng, pos_logs, 'b-')
+    plot(pval_rng, neg_logs, 'g-')
+    yticks(ytics,[int(10**y) for y in ytics])
+    title('positive vs. negative peaks')
+    legend(('positive','negative'),loc='upper right')
+    xlabel('-log(p-value)')
+    ylabel('# Peaks')
+    axis('tight')
+
+    subplot(212)
+    plot(pval_rng[:len(pos_ratio)], map(log10,pos_ratio), 'k-')
+    plot(pval_rng[len(pos_ratio):], map(log10,pos_only),'k--')
+    #plot(pval_rng,pos_ratio, 'k-')
+    axis('tight')
+    xlabel('-log(p-value)')
+    #ylabel('# pos / (# pos + # neg)')
+    ylabel('log10(# pos / # neg)')
+
+    if opts.out_fn is None :
+        pos_base_fn, pos_fn_ext = os.path.splitext(pos_fn)
+        out_fn = '%s_pos_v_neg.png'%pos_base_fn
+    else :
+        out_fn = opts.out_fn
+    savefig(out_fn)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/scripts/probeset_to_known_gene.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,124 @@
+#!/usr/bin/env python
+
+import gzip
+import sys
+from collections import defaultdict as dd
+from csv import DictReader, DictWriter
+from optparse import OptionParser
+from sqlite3 import connect
+
+from chipsequtil import KnownGeneFile
+
+# TODO make these parameters?
+#affy_anno_fn = 'Mouse430A_2.na30.annot.csv'
+
+usage = '%prog [options] <knownGene annotation> <knownToMOE430 file> <knownGene Xref file> <microarray data file>'
+description = 'Maps probset data to knownGene database provided by UCSC. Probesets \
+that map to multiple knownGenes have one record per knownGene with duplicate data \
+otherwise.  Output is knownGene id prepended to each record in microarray data file.'
+parser = OptionParser(usage=usage,description=description)
+parser.add_option('--output',dest='output',default=None,help='file to output mapping to [default: stdout]')
+#parser.add_option('--symbol-xref',dest='symbol_xref',default=None,help='use the provided kgXref file to output gene symbols as second column')
+
+if __name__ == '__main__' :
+
+    opts, args = parser.parse_args(sys.argv[1:])
+
+    #affy_bioc_fn = 'microarray_analysis/cbfb_vector_BH_all.txt'
+    #knownToMOE_sql_fn = 'knownToMOE430.sql'
+    #knownToMOE_data_fn = 'knownToMOE430.txt'
+
+    if len(args) < 3 :
+        parser.error('Incorrect number of arguments provided')
+
+    known_gene_fn = args[0]
+    knownToMOE_data_fn = args[1]
+    Xref_fn = args[2]
+    affy_bioc_fn = args[3]
+
+    # affymetrix file from bioconductor
+    affy_bioc_f = open(affy_bioc_fn)
+    affy_bioc = {}
+    affy_bioc_reader = DictReader(affy_bioc_f,delimiter="\t")
+    for row in  affy_bioc_reader :
+        affy_bioc[row['ID']] = row
+
+    # knownGene annotation
+    kg = KnownGeneFile(known_gene_fn)
+    kg_ids = dict([(x['name'],x) for x in kg])
+
+    # affy to knownGene
+    affy_to_kg_map = dd(list)
+    affy_to_kg_fields = ['kgID','affyID']
+    affy_to_kg_f = open(knownToMOE_data_fn)
+    kg_to_affy_map = dd(list)
+    for row in DictReader(affy_to_kg_f,fieldnames=affy_to_kg_fields,delimiter="\t") :
+        affy_to_kg_map[row['affyID'][2:]].append(row['kgID'])
+        kg_to_affy_map[row['kgID']].append(row['affyID'][2:])
+
+    if opts.output :
+        out_f = open(opts.output,'w')
+    else :
+        out_f = sys.stdout
+
+    out_header = ['knownGeneID']+affy_bioc_reader.fieldnames
+
+    # see if the user wants gene symbols too
+    opts.symbol_xref = Xref_fn
+    if opts.symbol_xref :
+        kgXref_fieldnames = ['kgID','mRNA','spID','spDisplayID','geneSymbol','refseq','protAcc','description']
+        symbol_xref_reader = DictReader(open(opts.symbol_xref),fieldnames=kgXref_fieldnames,delimiter='\t')
+        symbol_xref_map = {}
+        for rec in symbol_xref_reader :
+            symbol_xref_map[rec['kgID']] = rec
+        out_header = ['knownGeneID','geneSymbol']+affy_bioc_reader.fieldnames
+
+    out_writer = DictWriter(out_f,delimiter='\t',fieldnames=out_header,lineterminator='\n')
+    out_writer.writerow(dict(zip(out_header,out_header)))
+    for probesetID, data in affy_bioc.items() :
+        kg_ids = affy_to_kg_map[probesetID]
+        for kg_id in kg_ids :
+            out_l = {'knownGeneID':kg_id}
+            if opts.symbol_xref :
+                out_l['geneSymbol'] = symbol_xref_map[kg_id]['geneSymbol']
+            out_l.update(data)
+            out_writer.writerow(out_l)
+
+    # figure out if any probsets map to non-overlapping loci
+    # dirty dirty dirty dirty
+    if False :
+        affy_id_loci = {}
+        for affyID, kgIDs in affy_to_kg_map.items() :
+            # check all pairwise kgIDs to make sure they all overlap in transcription start sites
+            kg_id_loci = dd(list)
+            for i, kgID1 in enumerate(kgIDs) :
+                kgID1_rec = kg_ids[kgID1]
+                kg_id_loci[kgID1].append(kgID1_rec)
+                for j, kgID2 in enumerate(kgIDs) :
+                    kgID2_rec = kg_ids[kgID2]
+                    # these are all gene overlap conditions
+                    #kg1Start = kgID1_rec['txEnd'] if kgID1_rec['strand'] == '-' else kgID1_rec['txStart']
+                    #kg1End = kgID1_rec['txStart'] if kgID1_rec['strand'] == '-' else kgID1_rec['txEnd']
+                    #kg2Start = kgID2_rec['txEnd'] if kgID2_rec['strand'] == '-' else kgID2_rec['txStart']
+                    #kg2End = kgID2_rec['txStart'] if kgID2_rec['strand'] == '-' else kgID2_rec['txEnd']
+                    kg1Start, kg1End = kgID1_rec['txStart'], kgID1_rec['txEnd']
+                    kg2Start, kg2End = kgID2_rec['txStart'], kgID2_rec['txEnd']
+                    if (kg2Start <= kg1Start <= kg2End or \
+                       kg1Start <= kg2Start <= kg1End or \
+                       (kg2Start < kg1Start and kg2End > kg1End) or \
+                       (kg1Start < kg2Start and kg1End > kg2End)) and \
+                       kgID1_rec['chrom'] == kgID2_rec['chrom'] and \
+                       i != j :
+                        # we have overlap
+                        pass
+                    elif i != j :
+                        # doesn't overlap oh noes
+                        kg_id_loci[kgID1].append(kgID2_rec)
+            for kg_id, kg_recs in kg_id_loci.items() :
+                if len(kg_recs) != 1 :
+                    affy_id_loci[affyID] = (kg_id, len(kg_recs),len(kgIDs),kg_recs,kgIDs)
+
+        if len(affy_id_loci) != 0 :
+            sys.stderr.write('Warning: %d probeset ids map to non-overlapping loci'%len(affy_id_loci))
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/scripts/rejection_sample_fasta.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,78 @@
+#!/usr/bin/env python
+
+import sys
+
+from optparse import OptionParser
+
+from chipsequtil import check_org_settings
+from chipsequtil.util import MultiLineHelpFormatter
+from chipsequtil.sampling import rejection_sample_bg
+from chipsequtil.seq import fasta_to_dict, write_fasta_to_file
+
+usage = '%prog [options] <organism> <fasta file> [<fasta file> ... ]'
+description = """Use rejection sampling to generate a set of background/random \
+sequences matching the distance to nearest transcription start site, sequence \
+length, and GC content distributions of the input fasta file(s).  Generated \
+sequences are genomic sequences sampled based on these distributions. All sequences \
+from all files are used to generate the background sequences. The following \
+command must output a path to a nib genomic sequence directory and refGene \
+annotation, respectively :
+
+$> org_settings.py <organism> genome_dir
+$> org_settings.py <organism> refgene_anno_path
+
+Utility prints out generated fasta records to stdout by default.  Input sequences \
+from chr20 are mapped to chrX, chr21 are mapped to chrY, and sequences from chrM \
+are not used.
+"""
+epilog = "Note: script only considers sequences with unique header names, only the last record of those with identical header names is used"
+parser = OptionParser(usage=usage,description=description,formatter=MultiLineHelpFormatter())
+parser.add_option('-n','--num-seqs',dest='num_seqs',default='1x', help='number of sequences to generate, either absolute number or factor of # input sequences, e.g. 2.5x for 2.5 times the # of input sequences [default: 1x]')
+parser.add_option('--output',dest='output',default=None,help='file to output fasta records to [default: stdout]')
+parser.add_option('--bed',dest='bed',action='store_true', help='also produce a BED formatted file representing sampled sequences')
+parser.add_option('--bed-output',dest='bed_output',default='output.bed',help='with --bed, file to output BED records to [default: %default]')
+parser.add_option('-v','--verbose',dest='verbose',action='store_true',help='print out debug information')
+
+if __name__ == '__main__' :
+
+    opts, args = parser.parse_args(sys.argv[1:])
+
+    if len(args) < 2 :
+        parser.error('Must be 2 non-option arguments')
+
+    organism, fasta_fns = args[0], args[1:]
+
+    reqd_settings = ['genome_dir','refgene_anno_path']
+    if not check_org_settings(organism,reqd_settings) :
+        parser.error('The <organism> settings set must contain paths for %s'%reqd_settings)
+
+    # load up all the fasta records
+    fasta_recs = {}
+    for fasta_fn in fasta_fns :
+        fasta = fasta_to_dict(fasta_fn)
+        fasta_recs.update(fasta)
+
+    # parse --num-seqs argument
+    if opts.num_seqs.endswith('x') :
+        num_seq_factor = float(opts.num_seqs[:-1])
+        num_seqs = int(len(fasta_recs)*num_seq_factor)
+    else :
+        try :
+            num_seqs = int(opts.num_seqs)
+        except TypeError :
+            parser.error("Incorrect format of --num-seqs argument, must either be an integer or a factor ending with x, e.g. 2.5x")
+
+    # generate the sequences
+    gen_seqs = rejection_sample_bg(fasta_recs,organism,num_samples=num_seqs,verbose=opts.verbose)
+
+    # write out to file
+    if opts.output :
+        write_fasta_to_file(gen_seqs,opts.output)
+    else :
+        sys.stdout.write(''.join(['>%s\n%s\n'%(k,v) for k,v in gen_seqs.items()]))
+
+    if opts.bed :
+        bed_f = open(opts.bed_output,'w')
+        bed_f.write(''.join([k.replace(':','\t').replace('-','\t')+'\n' for k in gen_seqs.keys()]))
+        bed_f.close()
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/scripts/sort_bed.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,44 @@
+#!/usr/bin/env python
+import sys, os
+from optparse import OptionParser
+from collections import defaultdict as dd
+from csv import reader, writer
+
+
+usage = "%prog [options] <BED file> [<BED file> <BED file>...]"
+description = """\
+Sort the BED formatted files first by chromosome (field 1) and then by start
+coordinate (field 2).  Lines from all files submitted are concatenated and
+sorted in the final output."""
+parser = OptionParser(usage=usage,description=description)
+parser.add_option('--output',dest='output',default=sys.stdout,help='filename to write the sorted BED lines [default: stdout]')
+
+if __name__ == '__main__' :
+
+    opts, args = parser.parse_args(sys.argv[1:])
+
+    if len(args) == 0 :
+        parser.error("Must provide at least one file")
+
+    fns = args
+    chromos = dd(list)
+
+    # load each chromosome separately
+    for fn in fns :
+        bed_reader = reader(open(fn),delimiter='\t')
+        for line in bed_reader :
+            chromos[line[0]].append(line)
+
+    # determine where we're writing to
+    if opts.output != sys.stdout :
+        f = open(opts.output,'w')
+    else :
+        f = opts.output
+
+    # write the chromos in lexicographic sorted order
+    bed_writer = writer(f,delimiter='\t')
+    for k in sorted(chromos.keys()) :
+
+        # sort each chromosome's BED lines by stat position
+        chromos[k].sort(key=lambda x: int(line[1]))
+        bed_writer.writerows(chromos[k])
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/scripts/split_file.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,83 @@
+#!/usr/bin/env python
+
+from optparse import OptionParser
+from datetime import datetime
+from subprocess import Popen, PIPE
+import itertools
+import sys, os, getpass, re
+
+usage = "[%prog] [options] filename"
+description = """\
+Split <filename> into a set of files with either a specific number of lines
+(--split-type=lines, default) or into a specific number of files (--split-type=
+count).  Files are created with .XXXX appended, indicating the number of file
+split. Writes files to current working directory unless otherwise specified.
+"""
+
+parser = OptionParser(usage=usage,description=description)
+parser.add_option('--type',dest='split_type',type='choice',choices=['lines','count'],default='lines',help='how to split the file (WARNING: count does not preserve the sequence of lines in the original file when splitting) [default: %default]')
+#parser.add_option('--split-arg',dest='split_arg',default='1000',help='integer argument for split type (size specified as Xb, XK, XM, or XG, others are integers) [default: %default]')
+parser.add_option('--arg',dest='split_arg',type='int',default=1000,help='integer argument for split type [default: %default]')
+parser.add_option('--outdir',dest='outdir',default='.',help='directory to put the split files in [default: %default]')
+
+def get_file_parts(fn) :
+    fpath,fname = os.path.split(fn)
+    fbase,fext = os.path.splitext(fname)
+    return fpath,fname,fbase,fext
+
+if __name__ == '__main__' :
+
+    opts, args = parser.parse_args(sys.argv[1:])
+
+    if len(args) < 1 :
+        parser.print_usage()
+        sys.exit(1)
+
+    filename = args[0]
+    abs_filename = os.path.abspath(filename)
+
+    # check to ensure filename exists
+    if not os.path.exists(abs_filename) :
+        sys.stderr.write('File %s does not exist, exiting\n'%abs_filename)
+        parser.print_usage()
+        sys.exit(2)
+
+    # split the file
+    split_size = opts.split_arg
+    fpath,fname,fbase,fext = get_file_parts(abs_filename)
+    if opts.split_type == 'lines' :
+        curr_split = 0 # for first condition
+        split_fd = None
+        for i,l in enumerate(open(abs_filename)) :
+            if i%split_size == 0 :
+                if split_fd : split_fd.close() # close it if we aren't on the first split
+                split_fd = open(os.path.join(opts.outdir,fname)+'.%04d'%curr_split,'w')
+                curr_split += 1
+            split_fd.write(l)
+        nlines = i
+    elif opts.split_type == 'count' :
+        # create split_size split files by writing lines round robin
+        split_fds = [open(os.path.join(opts.outdir,fname)+'.%04d'%x,'w') for x in range(split_size)]
+        split_cycle = itertools.cycle(split_fds)
+        for i,l in enumerate(open(abs_filename)) :
+            split_cycle.next().write(l)
+        nlines = i
+
+        # close all the handles
+        [fd.close() for fd in split_fds]
+
+    elif opts.split_type == 'size' :
+        # parse split_arg argument, into integer if split_type is 'size'
+        if opts.split_type == 'size' :
+            m = re.match('^(\d+)([bKMG])$',opts.split_arg)
+            if m is None :
+                sys.stderr.write("Incorrect --split-arg argument for --split-type=size, I understand only X[bKMG], exiting\n")
+                parser.print_usage()
+                sys.exit(3)
+            else :
+                size_d = {'b':1,'K':1024,'M':pow(1024,2),'G':pow(1024,3)}
+                split_size = int(m.groups()[0])*size_d[m.groups()[1]]
+
+        fd = open(abs_filename)
+        curr_split_size = 0
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/scripts/split_qsub.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,73 @@
+#!/usr/bin/env python
+
+from __future__ import with_statement
+import os
+import sys
+from optparse import OptionParser
+from subprocess import Popen, PIPE
+
+from chipsequtil import get_file_parts
+
+usage = "[%prog] [options] <utility> <file> [<file> <file> ...]"
+description = """\
+Submit a job using qsub for <utility>, each with one <file> as an argument.  Any
+options specified on the command line that [%prog] cannot interpret are passed
+on to the utility for each call."""
+epilog = "Note: this script only works in Unix-style environments"
+parser = OptionParser(usage=usage,description=description,epilog=epilog)
+parser.add_option('--suffix',dest='suffix',default=None,help='string to append to stdout files, e.g. <filename>_<--suffix>.<--ext> [default: <utility>]')
+parser.add_option('--ext',dest='ext',default='.out',help='file extension to use for stdout files')
+parser.add_option('--util-args',dest='util_args',default='',help='double quote wrapped arguments to pass to <utility>')
+parser.add_option('--keep-stderr',dest='keep_stderr',action='store_true',help='capture stderr files, useful for debugging')
+parser.add_option('--keep-scripts',dest='keep_scripts',action='store_true',help='do not delete qsub scripts generated after job submission')
+parser.add_option('--die-on-error',dest='die_on_err',action='store_true',help='if any one of the qsub submissions returns non-zero exit status, stop executing')
+
+
+if __name__ == '__main__' :
+
+    opts, args = parser.parse_args(sys.argv[1:])
+
+    utility, filenames = args[0], args[1:]
+
+    # try to find the utility
+    abs_utility = os.path.abspath(utility)
+    if not os.path.exists(abs_utility) :
+        # look on the path
+        abs_utility = Popen('which %s'%utility,shell=True,stdout=PIPE,stderr=PIPE).communicate()[0].strip()
+        if not os.path.exists(abs_utility) :
+            raise Exception("Utility %s could not be found in the local directory or on the user's path, exiting"%utility)
+            sys.exit(1)
+
+    upath,uname,ubase,uext = get_file_parts(abs_utility)
+
+    runscript_tmpl = """
+#!/bin/bash
+
+#$ -N %(jobname)s
+#$ -S /bin/sh
+#$ -o %(stdout)s
+#$ -e %(stderr)s
+#$ -cwd
+export PYTHONPATH=%(pythonpath)s:${PYTHONPATH}
+
+%(utility)s %(utilargs)s %(filename)s"""
+
+    suffix = ubase if opts.suffix is None else opts.suffix
+    for fn in filenames :
+        abs_fn = os.path.abspath(fn)
+        fpath,fname,fbase,fext = get_file_parts(abs_fn)
+        stdout = os.path.join(fpath,fname+'_'+suffix+opts.ext)
+        stderr = '/dev/null' if not opts.keep_stderr else os.path.join(fpath,fname+'_'+suffix+'.err')
+        call_script = runscript_tmpl%{'jobname':fname,'utility':abs_utility,'filename':abs_fn,'stdout':stdout,'stderr':stderr,'utilargs':opts.util_args,'pythonpath':os.environ.get('PYTHONPATH','')}
+        f = open('%s'%abs_fn+'_'+utility+'.script','w')
+        f.write(call_script)
+        f.close()
+        p = Popen('qsub %s'%f.name,shell=True)
+        p.wait()
+        if not opts.keep_scripts :
+            os.remove(f.name)
+
+        if opts.die_on_err and p.returncode != 0 :
+            with open(stderr,'w') as f :
+                f.write('qsub returned non-zero exit code for file %s, aborting\n'%fn)
+            sys.exit(1)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/scripts/wait_for_jobid.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,65 @@
+#!/usr/bin/env python
+
+import re
+import sys
+import time
+
+from optparse import OptionParser
+from subprocess import Popen, PIPE
+
+usage = '%prog [options] <job id> [<job id>...]'
+desc = 'Poll qstat and wait until all <job id>s are finished'
+parser = OptionParser(usage=usage,description=desc)
+
+array_job_match = '^(\d+)\[\]\.(.*)'
+array_job_regex = '^%s\[[0-9]\+\]'
+
+def is_job_done(jobid) :
+
+    done = False
+
+    # have to handle array jobs differently than standalone
+    array_match = re.search(array_job_match,jobid)
+    if array_match is not None :
+        idnum, rest = array_match.groups()
+        jobid_regex = array_job_regex%idnum
+        qstat_p = Popen('qstat -t | grep "%s" | cut -f 1 -d " "'%jobid_regex,shell=True,stdout=PIPE)
+        stdout, stderr = qstat_p.communicate()
+        done = len(stdout) == 0
+
+    else :
+        # -j is only for SGE
+        qstat_p = Popen('qstat -j %s'%jobid,shell=True,stdout=PIPE,stderr=PIPE)
+        qstat_p.wait()
+        if qstat_p.returncode == 0 :
+            pass
+        # assume any != 0 return code means job is done
+        else :
+            done = True
+
+    return done
+
+if __name__=='__main__':
+
+    opts, args = parser.parse_args(sys.argv[1:])
+
+    jobids = map(lambda x: x.strip(), args)
+
+    # wait for all of them
+    sys.stderr.write('Waiting for jobs to complete\n')
+    jobs_done = [False]*len(jobids)
+    try :
+        while not all(jobs_done) :
+            jobs_not_done = filter(lambda x: not x[1], enumerate(jobs_done))
+            for i, jid in jobs_not_done :
+                jobs_done[i] = is_job_done(jobids[i])
+            sys.stderr.write('Jobs done: %d/%d\r'%(sum(jobs_done),len(jobs_done)))
+            time.sleep(2)
+            sys.stderr.flush()
+    except KeyboardInterrupt :
+        sys.stderr.write('\n')
+        resp = raw_input('Caught keyboard interrupt, kill all jobs? [y/N] ')
+        if resp.lower() == 'y' :
+            Popen('kill_all_jobs.sh',shell=True)
+
+    sys.stderr.write('done\n')
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/scripts/wait_for_qsub.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,14 @@
+#!/usr/bin/env python
+import time
+from subprocess import Popen, PIPE
+
+if __name__ == '__main__' :
+
+    # this is gross, but it works when you need to stall a pipeline until all your jobs are done
+    done = False
+    while not done :
+        qstat_output = Popen('qstat',shell=True,stdout=PIPE).communicate()[0]
+        if qstat_output == '' :
+            done = True
+        else :
+            time.sleep(1)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/scripts/wqsub.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,145 @@
+#!/usr/bin/env python
+
+from __future__ import with_statement
+import os
+import re
+import sys
+import time
+from optparse import OptionParser
+from subprocess import Popen, PIPE
+
+from chipsequtil import get_file_parts
+
+usage = "[%prog] [options] command"
+description = """Wrap the specified command into a qsub script and submit it
+for execution. Script captures both stdout and stderr to the current directory.
+By default, all of the user's environment variables are put into the script
+(compatible with SGE only ATM)."""
+epilog = "Note: this script only works in Unix-style environments."
+parser = OptionParser(usage=usage,description=description,epilog=epilog)
+parser.add_option('--wqsub-name',dest='wqsub_name',default='wqsub',help='job name to submit as <--wqsub-name>_<first non-whitespace chars in command> [default: %default]')
+parser.add_option('--wqsub-ext',dest='wqsub_ext',default='.out',help='file extension to use for stdout files')
+parser.add_option('--wqsub-keep-script',dest='wqsub_keep_script',action='store_true',help='do not delete qsub script generated after job submission')
+parser.add_option('--wqsub-no-env',dest='wqsub_no_env',action='store_true',help='do not include any local environment variables in the script')
+parser.add_option('--wqsub-no-submit',dest='wqsub_no_sub',action='store_true',help='create script but do not submit job (useful for generating scripts)')
+parser.add_option('--wqsub-drm',dest='drm',default='SGE',type='choice',choices=['SGE','TORQUE'],help='the DRM to generate scripts for [default: %default]')
+parser.add_option('--wqsub-drm-arg',dest='drm_args',action='append',default=[],help='arguments to pass as parameters in the job script specific to the DRM, use multiple option flags to specify multiple parameters')
+parser.add_option('--wqsub-wait',dest='wait',action='store_true',help='poll the DRM and do not return control until job is finished (only works for TORQUE)')
+
+templates = {
+'TORQUE': """\
+#!/bin/bash
+
+#PBS -N %(jobname)s
+#PBS -o %(stdout)s
+#PBS -e %(stderr)s
+#PBS -d %(cwd)s
+%(env)s
+%(addnl)s
+
+%(command)s
+""",
+'SGE':"""\
+#!/bin/bash
+
+#$ -N %(jobname)s
+#$ -S /bin/bash
+#$ -o %(stdout)s
+#$ -e %(stderr)s
+#$ -cwd
+%(env)s
+%(addnl)s
+
+%(command)s
+"""
+}
+
+drm_symb = {
+'TORQUE': 'PBS',
+'SGE': '$'
+}
+
+if __name__ == '__main__' :
+
+    # get the wqsub args out first
+    wqsub_args = []
+    other_args = []
+    for arg in sys.argv :
+        if arg.count('wqsub') != 0 or arg in ['-h','--help'] :
+            wqsub_args.append(arg)
+        else :
+            other_args.append(arg)
+
+    opts, args = parser.parse_args(wqsub_args)
+
+    if len(other_args) == 0 :
+        parser.error('Must provide a command')
+
+    command = ' '.join(other_args)
+    runscript_tmpl = templates[opts.drm]
+    # set up job parameters
+    cmd_exe = os.path.basename(other_args[0])
+    jobname = opts.wqsub_name+'_'+cmd_exe
+    stdout_fn = jobname+opts.wqsub_ext
+    stdout = os.path.abspath(stdout_fn)
+    fpath,fname,fbase,fext = get_file_parts(stdout)
+    stderr = os.path.abspath(os.path.join(jobname+'.err'))
+
+    # get the user's current environment and put it into the execute script
+    if opts.wqsub_no_env :
+        env_str = '# local environment variables omitted'
+    else :
+        env_str = '#%s -V'%drm_symb[opts.drm]
+
+    # construct the script
+    addnl_params = []
+    for addnl in opts.drm_args :
+        addnl_params.append('#%s %s'%(drm_symb[opts.drm],addnl))
+    addnl_params = '\n'.join(addnl_params)
+
+    job_dict = {'jobname':fname,
+                'stdout':stdout,
+                'stderr':stderr,
+                'command':command,
+                'env':env_str,
+                'cwd':os.getcwd(),
+                'addnl':addnl_params}
+
+    call_script = runscript_tmpl%job_dict
+    # write the script to file
+    script_fn = os.path.abspath(jobname+'.script')
+    with open(script_fn,'w') as f :
+        f.write(call_script)
+
+    if not opts.wqsub_no_sub :
+        p = Popen('qsub %s'%f.name,shell=True,stdout=PIPE)
+        p.wait()
+        stdout, stderr = p.communicate()
+        if not opts.wqsub_keep_script :
+            os.remove(f.name)
+        if opts.wait :
+            done = False
+            print 'Waiting on job id %s'%stdout.strip()
+            while not done :
+                qstat_p = Popen('qstat %s'%stdout,shell=True,stdout=PIPE,stderr=PIPE)
+                qstat_p.wait()
+                if opts.drm == 'TORQUE' :
+                    done = False if qstat_p.returncode != 153 else True
+                elif opts.drm == 'SGE' :
+                    done = False if qstat_p.returncode != 1 else True
+                time.sleep(3) # wait three seconds because it's nice
+        else :
+            if opts.drm == 'TORQUE' :
+                print stdout.strip()
+            elif opts.drm == 'SGE' :
+                qsub_output_patt = 'Your job (\d+)'
+                m = re.match(qsub_output_patt,stdout.strip())
+                if m is not None:
+                    print m.group(1)
+                    sys.exit(0)
+
+                # might be an array job
+                qsub_output_patt = 'Your job-array (\d+)\.'
+                m = re.match(qsub_output_patt,stdout.strip())
+                if m is not None:
+                    print m.group(1)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/scripts/wqsub_drmaa.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,98 @@
+#!/usr/bin/env python
+
+from __future__ import with_statement
+import os
+import sys
+from optparse import OptionParser
+from subprocess import Popen, PIPE
+
+import drmaa
+
+from chipsequtil import get_file_parts
+
+usage = "[%prog] [options] command"
+description = """Submit *command* to a DRMAA-enabled job queueing system.
+Output of the command goes to file, stderr is ignored unless specified 
+as an option.  By default, all of the user's environment
+variables are imported into job environment."""
+epilog = "Note: this script only works in Unix-style environments."
+parser = OptionParser(usage=usage,description=description,epilog=epilog)
+parser.add_option('--wqsub-name',dest='wqsub_name',default='wqsub',help='job name to submit as <--wqsub-name>_<first non-whitespace chars in command> [default: %default]')
+parser.add_option('--wqsub-stdout',dest='wqsub_stdout',default=None,help='name of file to write stdout to (equivalent to -o argument in SGE) [default: <wqsub-name>_<command>.out]')
+parser.add_option('--wqsub-stderr',dest='wqsub_stderr',default=None,help='name of file to write stderr to (equivalent to -e argument in SGE) [default: <wqsub-name>_<command>.err]')
+parser.add_option('--wqsub-join',dest='wqsub_join',action='store_true',help='join stdout and stderr into file indicated by --wqsub-stdout (equivalent to -j flag in SGE)')
+parser.add_option('--wqsub-no-env',dest='wqsub_no_env',action='store_true',help='do not include any local environment variables in the script')
+parser.add_option('--wqsub-wait',dest='wqsub_wait',action='store_true',help='wait for job to finish executing before returning from script')
+
+
+if __name__ == '__main__' :
+
+    # get the wqsub args out first
+    wqsub_args = []
+    other_args = []
+    for arg in sys.argv :
+        if arg.count('wqsub') != 0 or arg in ['-h','--help'] :
+            wqsub_args.append(arg)
+        else :
+            other_args.append(arg)
+
+    opts, args = parser.parse_args(wqsub_args)
+
+    if len(other_args) == 0 :
+        parser.error('Must provide a command')
+
+    # set up job parameters
+    jobname = opts.wqsub_name+'_'+other_args[0]
+    stdout_fn = jobname+'.out'
+    if opts.wqsub_stdout :
+        stdout_fn = opts.wqsub_stdout
+    stdout = os.path.abspath(stdout_fn)
+
+    if os.path.exists(stdout) :
+        os.remove(stdout)
+
+    stderr_fn = jobname+'.err'
+    if opts.wqsub_stderr :
+        stderr_fn = opts.wqsub_stderr
+    stderr = os.path.abspath(stderr_fn)
+    if os.path.exists(stderr) :
+        os.remove(stderr)
+
+    # drmaa job submission
+    session = drmaa.Session()
+    session.initialize()
+
+    # initialize job template
+    job_template = session.createJobTemplate()
+
+    # construct DRMAA job
+    command,args = other_args[0],other_args[1:]
+    job_template.remoteCommand = command
+    job_template.args = args
+    job_template.jobName = jobname
+    job_template.joinFiles = opts.wqsub_join
+
+    # output and error paths apparently require a ':' in front
+    job_template.outputPath = ':'+stdout
+    job_template.errorPath = ':'+stderr
+
+    # get the user's current environment and put it into the execute script
+    if not opts.wqsub_no_env :
+        job_template.jobEnvironment = os.environ
+
+    # submit the job and wait for it
+    jobid = session.runJob(job_template)
+
+    if opts.wqsub_wait :
+        # submit and wait for job to complete, keyboard interrupt aborts job
+        try :
+
+            retval = session.wait(jobid, drmaa.Session.TIMEOUT_WAIT_FOREVER)
+
+        except KeyboardInterrupt :
+            sys.stderr.write('Keyboard interrupt caught (^C), aborting')
+            pass
+
+    # clean up
+    session.deleteJobTemplate(job_template)
+    session.exit()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/setup.cfg	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,2 @@
+[install]
+prefix=~/arch/univ
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/setup.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,70 @@
+#!/usr/bin/env python
+
+import os
+import sys
+
+from distutils.core import setup
+#from ez_setup import use_setuptools
+#use_setuptools()
+#from setuptools import setup
+
+# convenience is king
+opj = os.path.join
+
+# make sure org_settings.cfg is in source directory
+org_settings_fn = 'org_settings.cfg'
+dist_settings_path = opj(os.getcwd(),'src','chipsequtil',org_settings_fn)
+if not os.path.exists(dist_settings_path) :
+    sys.stderr.write('WARNING: %s could not be found \
+                      in distribution root directory.  org_settings.py script may \
+                      not work properly.\n'%dist_settings_path)
+
+scripts = ['scripts/build_chipseq_infosite.py',
+           'scripts/chipseq_pipeline.py',
+           'scripts/combine_gerald_stats.py',
+           'scripts/compare_microarray_binding.py',
+           'scripts/create_pipeline_script.py',
+           'scripts/extract_promoters.py',
+           'scripts/filter_bed_by_position_count.py',
+           'scripts/filter_macs_peaks.py',
+           'scripts/filter_gps_peaks.py',
+           'scripts/filter_mapped_known_genes.py',
+           'scripts/generate_stats_doc.py',
+           'scripts/gerald_stats.py',
+           'scripts/gerald_to_bed.py',
+           'scripts/integrate_macs_ucsc.py',
+           'scripts/join_mapped_known_genes.py',
+           'scripts/map_intervals.py',
+           'scripts/map_peaks_to_genes.py',
+           'scripts/map_peaks_to_known_genes.py',
+           'scripts/motif_scan.py',
+           'scripts/nibFrag.py',
+           'scripts/org_settings.py',
+           'scripts/peaks_to_fasta.py',
+           'scripts/plot_pos_vs_neg_peaks.py',
+           'scripts/plot_peak_loc_dist.py',
+           'scripts/probeset_to_known_gene.py',
+           'scripts/rejection_sample_fasta.py',
+           'scripts/sort_bed.py',
+           'scripts/split_file.py',
+           'scripts/split_qsub.py',
+           'scripts/THEME.sh',
+           'scripts/wait_for_qsub.py',
+           'scripts/wait_for_jobid.py',
+           'scripts/wqsub.py',
+           'scripts/wqsub_drmaa.py',
+           ]
+
+# setup and install
+setup(name='chipsequtil',
+      version='0.5',
+      author='Adam Labadorf',
+      author_email='alabadorf@gmail.com',
+      package_dir={'':'src'},
+      py_modules=['chipsequtil.nib','chipsequtil.util','chipsequtil.plotting',
+                  'chipsequtil.sampling','chipsequtil.seq'],
+      packages=['chipsequtil'],
+      package_data={'': ['org_settings.cfg']},
+      scripts=scripts,
+      #cmdclass={'uninstall': uninstall},
+     )
Binary file chipsequtil-master/src/._chipsequtil has changed
Binary file chipsequtil-master/src/chipsequtil/.___init__.py has changed
Binary file chipsequtil-master/src/chipsequtil/._chipsequtil.py has changed
Binary file chipsequtil-master/src/chipsequtil/._motiftools.py has changed
Binary file chipsequtil-master/src/chipsequtil/._nib.py has changed
Binary file chipsequtil-master/src/chipsequtil/._plotting.py has changed
Binary file chipsequtil-master/src/chipsequtil/._sampling.py has changed
Binary file chipsequtil-master/src/chipsequtil/._seq.py has changed
Binary file chipsequtil-master/src/chipsequtil/._util.py has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/src/chipsequtil/__init__.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,5 @@
+"""
+This module needs documentation.
+"""
+
+from chipsequtil import *
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/src/chipsequtil/chipsequtil.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,718 @@
+import math
+import os
+import re
+import string
+import sys
+
+from ConfigParser import ConfigParser
+from csv import DictReader
+from collections import defaultdict
+
+import chipsequtil
+
+# for RefGeneDB
+from util import KeyedBinaryTree
+
+
+def get_file_parts(path) :
+    """For <path>/<basename>.<ext>, returns 4-tuple (<path>,<basename>.<ext>,<basename>,<ext>)"""
+    path,fn = os.path.split(path)
+    basename,ext = os.path.splitext(fn)
+    return path,fn,basename,ext
+
+def parse_number(n) :
+    """Try to cast intput first to float, then int, returning unchanged if both fail"""
+    try :
+        return float(n) if '.' in n else int(n)
+    except :
+        return n
+
+
+def gerald_to_bed(gerald,min_fields=False) :
+    """Convert a GERALDOutput object into a BEDOutput object
+
+    Keyword argument *min_fields* produces BED alignment with only the first 
+    three fields populated
+    """
+
+    d = {}.fromkeys(BEDOutput.FIELD_NAMES,'')
+
+    # required BED fields
+    d['chrom'] = gerald.match_chromo
+    d['chromStart'] = gerald.match_pos
+    d['chromEnd'] = gerald.match_pos+len(gerald.read)
+
+    # load the remaining information
+    if not min_fields :
+        d['strand'] = '+' if gerald.match_strand == 'F' else '-'
+        # TODO consider encoding single-read alignment score into BED score format
+        # that's it?
+    return BEDOutput(**d)
+
+
+class GERALDOutput :
+    """Container for one line of GERALD alignment output as generated by Illumina
+    pipeline version >= 1.3."""
+
+    FIELD_NAMES = ['machine',
+                   'run_number',
+                   'lane',
+                   'tile',
+                   'x_coord',
+                   'y_coord',
+                   'index',
+                   'read_no',
+                   'read',
+                   'quality_string',
+                   'match_chromo',
+                   'match_contig',
+                   'match_pos',
+                   'match_strand',
+                   'match_desc',
+                   'single_read_score',
+                   'paired_read_score',
+                   'partner_chromo',
+                   'partner_contig',
+                   'partner_offset',
+                   'partner_strand',
+                   'filtering',
+                   ]
+
+    def __init__(self,line) :
+
+        if type(line) == str :
+            line = line.strip().split('\t')
+
+        if len(line) != len(GERALDOutput.FIELD_NAMES) :
+            raise GERALDOutput.FormatException('Expected %d fields in input, \
+                                               found %d in line: %s'%
+                                               (len(GERALDOutput.FIELD_NAMES),
+                                                len(line),
+                                                line))
+
+        for fn,d in zip(GERALDOutput.FIELD_NAMES,line) :
+            setattr(self,fn,parse_number(d))
+
+    def __repr__(self) :
+        return 'GERALDOutput(%s)'%repr(self.output_format())
+
+    def output_format(self) :
+        """Tab delimited string of fields as they would appear in GERALD output file"""
+        return '\t'.join([str(getattr(self,d)) for d in GERALDOutput.FIELD_NAMES])+'\n'
+
+    class FormatException(Exception) :
+        """GERALD format exception, raised on malformatted input"""
+        pass
+
+
+class SmartFileIter :
+    r"""An 'abstract' class implementing a smart file iterator.  It is essentially
+    a wrapper around a collections.DictReader object that parses fields into
+    Python datatypes (int, float, tuple, objects, etc) as they are iterated.
+    The constructor argument *f* can be either a valid filename or a file-like
+    object.  This class should not be directly instantiated - rather it should
+    be subclassed with FIELD_NAMES and FIELD_TYPES defined.  FIELD_NAMES is a
+    list of strings referring to the names of the fields, FIELD_TYPES is a list
+    of the same length of callables that will parse the column into the desired
+    format. Example::
+    
+      >>> s = StringIO('chr1\t0\t100\t+\nchr3\t300\t601\t-\n')
+      >>> class IntervalFile(SmartFileIter):
+              r'''A SmartFileIter for files with lines formatted like:
+                    chrom\tstart\tend\tstrand'''
+              FIELD_NAMES = ['chrom','start','end','strand']
+              FIELD_TYPES= [str,int,int,lambda x: 0 if x == '+' else 1]
+      >>> f = IntervalFile(s)
+      >>> for r in f :
+              print r['chrom'], 'length: ', r['end']-r['start'], 'strand: ',r['strand']
+
+    ``r['start']`` and ``r['end']`` are automatically available as integers,
+    so the subraction works as expected.  Arbitrary functions that accept a
+    single argument and return a value may also be specified.
+    """
+
+    def __init__(self,f,skip_line_chars='#') :
+        if not hasattr(self,'FIELD_NAMES') or not hasattr(self,'FIELD_TYPES') :
+            raise Exception('Subclasses must define class members FIELD_NAMES and FIELD_TYPES')
+        if isinstance(f,str) :
+            f = open(f)
+        self._dict_reader = DictReader(f,delimiter='\t',fieldnames=self.FIELD_NAMES)
+        self.fieldnames = self.FIELD_NAMES
+        self.curr_line = self._dict_reader.next()
+        self.skip_line_chars = skip_line_chars
+
+        # skip initial comment lines
+        while self.curr_line[self.FIELD_NAMES[0]][0] in self.skip_line_chars :
+            self.curr_line = self._dict_reader.next()
+
+        if self.FIELD_NAMES[0] in self.curr_line.values() :
+            self.curr_line = self._dict_reader.next()
+
+    def __iter__(self) :
+        return self
+
+    def __getattr__(self,attr) :
+        try:
+            return self.__dict__[attr]
+        except KeyError :
+            return getattr(self._dict_reader,attr)
+
+    def next(self) :
+        """Emit the next record in the file as a dictionary with parsed values"""
+
+        if self.curr_line is None :
+            raise StopIteration()
+
+        line = self.curr_line
+
+        # check for comment
+        while line[self.FIELD_NAMES[0]][0] in self.skip_line_chars :
+            line = self.curr_line = self._dict_reader.next()
+
+        for k,f in zip(self.FIELD_NAMES, self.FIELD_TYPES) :
+            try :
+                line[k] = f(line[k])
+            except Exception, e :
+                #sys.stderr.write('Warning: field %s on line %d could not be properly formatted, exception %s\n'%(k,self._dict_reader.reader.line_num,str(e)))
+                line[k] = line[k]
+
+        try :
+            self.curr_line = self._dict_reader.next()
+        except StopIteration :
+            self.curr_line = None
+
+        return line
+
+
+class BEDOutput :
+    """*Deprecated*: Use *BEDFile* instead.
+    
+    Container for one line of BED alignment output"""
+
+    FIELD_NAMES = ['chrom',
+                   'chromStart',
+                   'chromEnd',
+                   'name',
+                   'score',
+                   'strand',
+                   'thickStart',
+                   'thickEnd',
+                   'itemRgb',
+                   'blockCount',
+                   'blockSizes',
+                   'blockStarts',
+                   ]
+
+    def __init__(self,line='',*args,**kwargs) :
+
+        if type(line) == str :
+            line = line.strip().split('\t')
+
+        if len(line) < 3 and any([x not in kwargs.keys() for x in ['chrom','chromStart','chromEnd']]) :
+            raise BEDOutput.FormatException('Format requres at least 3 fields in \
+                                            input, found %d in line: %s'%(len(line),line))
+        if len(line) > len(BEDOutput.FIELD_NAMES) :
+            raise BEDOutput.FormatException('Format requres at most %d fields in \
+                                             input, found %d in line: %s'%
+                                             (len(BEDOutput.FIELD_NAMES),len(line),line))
+
+        empty_fields = ['']*(len(BEDOutput.FIELD_NAMES)-len(line))
+        for fn,d in zip(BEDOutput.FIELD_NAMES,line+empty_fields) :
+            setattr(self,fn,parse_number(d))
+
+        # kwargs override line input
+        for k,v in kwargs.items() :
+            setattr(self,k,parse_number(v))
+
+    def __repr__(self) :
+        return 'BEDOutput(%s)'%(repr(self.output_format()))
+
+    def output_format(self) :
+        """Returns a string for the BED line as it would appear in a file"""
+        return '\t'.join([str(getattr(self,d)) for d in BEDOutput.FIELD_NAMES])+'\n'
+
+    class FormatException(Exception) :
+        """BED format exception, raised on malformatted input"""
+        pass
+
+
+class BEDFile(SmartFileIter) :
+    '''An iterable object containing the records in the supplied BED formatted 
+    file.  Fieldnames are::
+
+        FIELD_NAMES = ['chrom',
+                       'chromStart',
+                       'chromEnd',
+                       'name',
+                       'score',
+                       'strand',
+                       'thickStart',
+                       'thickEnd',
+                       'itemRgb',
+                       'blockCount',
+                       'blockSizes',
+                       'blockStarts',
+                       ]
+    '''
+
+    FIELD_NAMES = BEDOutput.FIELD_NAMES
+    FIELD_TYPES = [str,int,int,str,float,str,int,int,str,lambda x: x.split(','), lambda x: x.split(','), lambda x: x.split(',')]
+
+
+class BEDFile_dictreader(DictReader) :
+    '''An iterable object (subclasses csv.DictReader) containing the records in
+    the supplied BED formatted file.'''
+    FIELD_NAMES = BEDOutput.FIELD_NAMES
+    def __init__(self,bed) :
+        '''*bed* is either a filename or a file-like object representing a BED file'''
+        if isinstance(bed,str) :
+            bed = open(bed)
+        DictReader.__init__(self,bed,delimiter='\t',
+                            fieldnames=BEDOutput.FIELD_NAMES)
+
+
+class GPSFile(SmartFileIter) :
+    '''An iterable object containing the records in the peaks file format
+    generated by GPS. Fieldnames are::
+
+        FIELD_NAMES = ["Position",
+                       "IP",
+                       "Control",
+                       "Fold",
+                       "Q_-lg10",
+                       "P_-lg10",
+                       "IPvsEMP",
+                       "IPvsCTR",
+                       "blank"
+                      ]
+    '''
+
+    FIELD_NAMES = ["Position",
+                   "IP",
+                   "Control",
+                   "Fold",
+                   "Q_-lg10",
+                   "P_-lg10",
+                   "IPvsEMP",
+                   "IPvsCTR",
+                   "blank"
+                  ]
+
+    FIELD_TYPES = [lambda x: ('chr%s'%x.split(':')[0],int(x.split(':')[1]),x),
+                   float,
+                   float,
+                   float,
+                   float,
+                   float,
+                   float,
+                   float,
+                   str
+                  ]
+
+    def __init__(self,gps_fn) :
+        f = open(gps_fn)
+
+        SmartFileIter.__init__(self,f)
+
+
+class AffyBiocFile(DictReader) :
+    '''An iterable object (subclasses csv.DictReader) containing microarray data records in
+    the supplied bioconductor formatted file.'''
+
+    FIELD_NAMES = [ 'ID',
+                    'Symbol',
+                    'Name',
+                    'M',
+                    'A',
+                    't',
+                    'P.Value',
+                    'B'
+                  ]
+
+    def __init__(self,affyfn) :
+        '''*affyfn* is either a filename or a file-like object representing a bioconductor output file'''
+        if isinstance(affyfn,str) :
+            bed = open(bed)
+        DictReader.__init__(self,bed,delimiter='\t',
+                            fieldnames=BEDOutput.FIELD_NAMES)
+
+
+class RefGeneOutput(object) :
+    # http://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/refGene.sql
+    FIELD_NAMES = ['bin',
+                   'name',
+                   'chrom',
+                   'strand',
+                   'txStart',
+                   'txEnd',
+                   'cdsStart',
+                   'cdsEnd',
+                   'exonCount',
+                   'exonStarts',
+                   'exonEnds',
+                   'score',
+                   'name2',
+                   'cdsStartStat',
+                   'cdsEndStat',
+                   'exonFrames',]
+
+
+class RefGeneFile(DictReader) :
+    '''An iterable object (subclasses csv.DictReader) containing the records in
+    the supplied BED formatted file'''
+    def __init__(self,refGene_fn) :
+        refGene_f = open(refGene_fn)
+        # check for header
+        first_line = refGene_f.next()
+        if not first_line.strip().startswith('#') :
+            refGene_f.seek(0) # first line not header, reset the file pointer
+        DictReader.__init__(self,refGene_f,delimiter='\t',fieldnames=RefGeneOutput.FIELD_NAMES)
+
+class RefGeneFile_nottested(SmartFileIter) :
+    '''An iterable object containing the records in the supplied UCSC RefGene 
+    refFlat formatted file (see e.g. 
+    http://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/refGene.sql)'''
+    FIELD_NAMES = ['bin',
+                   'name',
+                   'chrom',
+                   'strand',
+                   'txStart',
+                   'txEnd',
+                   'cdsStart',
+                   'cdsEnd',
+                   'exonCount',
+                   'exonStarts',
+                   'exonEnds',
+                   'score',
+                   'name2',
+                   'cdsStartStat',
+                   'cdsEndStat',
+                   'exonFrames',]
+    FIELD_TYPES = [str,str,str,str,int,int,int,int,int,
+                   lambda x: [int(y) for y in x.split(',') if len(y) > 0],
+                   lambda x: [int(y) for y in x.split(',') if len(y) > 0],
+                   float,
+                   str,str,str,str]
+
+class KnownGeneFile(SmartFileIter) :
+    '''An iterable that parses UCSC's KnownGene gene annotation files.  Field 
+    names are::
+
+        FIELD_NAMES = [ 'name',
+                        'chrom',
+                        'strand',
+                        'txStart',
+                        'txEnd',
+                        'cdsStart',
+                        'cdsEnd',
+                        'exonCount',
+                        'exonStarts',
+                        'exonEnds',
+                        'proteinID',
+                        'alignID',
+                      ]
+'''
+
+    FIELD_NAMES = [ 'name',
+                    'chrom',
+                    'strand',
+                    'txStart',
+                    'txEnd',
+                    'cdsStart',
+                    'cdsEnd',
+                    'exonCount',
+                    'exonStarts',
+                    'exonEnds',
+                    'proteinID',
+                    'alignID',
+                  ]
+
+    # function pointers for correct formatting of field names
+    FIELD_TYPES = [ str,
+                    str,
+                    str,
+                    int,
+                    int,
+                    int,
+                    int,
+                    lambda x: [int(y) for y in x.split(',') if len(y) > 0],
+                    lambda x: [int(y) for y in x.split(',') if len(y) > 0],
+                    lambda x: [int(y) for y in x.split(',') if len(y) > 0],
+                    str,
+                    str,
+                  ]
+
+    def __init__(self,kg_fn) :
+        self.meta_data = []
+        self.file_info = {}
+        f = open(kg_fn)
+        self._dict_reader = DictReader(f,delimiter='\t',fieldnames=KnownGeneFile.FIELD_NAMES)
+
+    def __iter__(self) :
+        return self
+
+    def next(self) :
+        line = self._dict_reader.next()
+        for k,f in zip(self.FIELD_NAMES,self.FIELD_TYPES) :
+            line[k] = f(line[k])
+        return line
+
+
+#TODO maybe, finish this
+class RefGeneDB :
+    '''A class for querying RefGene annotation files. NOT DONE.'''
+
+    def __init__(self,refgene_fn) :
+        self._chrom_trees = defaultdict(KeyedBinaryTree)
+        refgene_f = RefGeneFile(refgene_fn)
+        genes = defaultdict(list)
+        for gene in refgene_f :
+            genes[gene['chrom']].append(gene)
+
+        # do stuff to ensure a balanced tree for each chromosome
+        for chrom,gene_list in genes.items() :
+            gene_list.sort(key=lambda x: int(x['txStart']))
+            first_half, second_half = gene_list[:len(gene_list)/2],gene_list[len(gene_list)/2:]
+            first_half.reverse()
+            for i in range(min(len(first_half,second_half))) :
+                to_add = first_half.pop(i)
+                self._chrom_trees[chrom].addNode(int(to_add['txStart']),to_add)
+
+
+class MACSFile(SmartFileIter) :
+    '''An iterable object containing the records in the supplied MACS peak file.
+    This class parses the comments found in the header of MACS peak files and
+    extracts metadata into the member dictionary **file_info**.  Here is an example
+    metadata dictionary::
+    
+      >>> f = MACSFile('macs_peaks.xls')
+      >>> f.file_info
+          {'ChIP-seq file': 'experiment_read_alignments.sam',
+           'MACS version': '1.4.0rc2 20110214',
+           'Range for calculating regional lambda': '1000 bps and 10000 bps',
+           'Redundant rate in control': 0.72999999999999998,
+           'Redundant rate in treatment': 0.080000000000000002,
+           'band width': 300,
+           'control file': 'control_read_alignments.sam',
+           'd': 203,
+           'effective genome size': 2110000000.0,
+           'format': 'SAM',
+           'maximum duplicate tags at the same position in control': 2,
+           'maximum duplicate tags at the same position in treatment': 2,
+           'model fold': '10,30',
+           'name': 'my_awesome_ChIP',
+           'pvalue cutoff': 1.0000000000000001e-05,
+           'tag size': 36,
+           'tags after filtering in control': 7879454,
+           'tags after filtering in treatment': 23927336,
+           'total tags in control': 29703098,
+           'total tags in treatment': 26092366}
+
+    The complete header can be found as a list in the **meta_data** member with
+    one comment per item.  The field names available are::
+
+        FIELD_NAMES = ['chr',
+                       'start',
+                       'end',
+                       'length',
+                       'summit',
+                       'tags',
+                       '-10*log10(pvalue)',
+                       'fold_enrichment',
+                       'FDR(%)',
+                      ]
+
+    '''
+    FIELD_NAMES = ['chr',
+                   'start',
+                   'end',
+                   'length',
+                   'summit',
+                   'tags',
+                   '-10*log10(pvalue)',
+                   'fold_enrichment',
+                   'FDR(%)',
+                  ]
+
+    FIELD_TYPES = [str,
+                   int,
+                   int,
+                   int,
+                   int,
+                   int,
+                   float,
+                   float,
+                   float
+                  ]
+
+    _METADATA_REGEXES = [
+            u'# This file is generated by (MACS version) (.*)',
+            u'# (name) = (.*)',
+            u'# (format) = (.*)',
+            u'# (ChIP-seq file) = (.*)',
+            u'# (control file) = (.*)',
+            u'# (effective genome size) = (.*)',
+            u'# (band width) = (\d+)',
+            u'# (model fold) = (.*)',
+            u'# (pvalue cutoff) = (.*)',
+            u'# (Range for calculating regional lambda) is: (.*)',
+            u'# (tag size) is determined as (\d+) bps',
+            u'# (total tags in treatment): (\d+)',
+            u'# (tags after filtering in treatment): (\d+)',
+            u'# (maximum duplicate tags at the same position in treatment) = (\d+)',
+            u'# (Redundant rate in treatment): (.*)',
+            u'# (total tags in control): (.*)',
+            u'# (tags after filtering in control): (.*)',
+            u'# (maximum duplicate tags at the same position in control) = (\d+)',
+            u'# (Redundant rate in control): (.*)',
+            u'# (d) = (\d+)'
+            ]
+
+    def __init__(self,macs_fn) :
+        self.meta_data = []
+        self.file_info = {}
+        if isinstance(macs_fn,str) :
+            f = open(macs_fn)
+        else :
+            f = macs_fn
+        done_with_header = False
+        while not done_with_header :
+            l = f.next().strip()
+            if l.startswith('#') :
+                for regex in MACSFile._METADATA_REGEXES :
+                    m = re.search(regex,l)
+                    if m is not None :
+                        self.file_info[m.group(1).strip()] = parse_number(m.group(2).strip())
+                self.meta_data.append(l)
+            elif l.startswith('\t'.join(MACSOutput.FIELD_NAMES[:5])) :
+                self.meta_data.append(l)
+                done_with_header = True
+
+        SmartFileIter.__init__(self,f)
+
+
+# for backwards compatibility, use MACSFile instead...?
+class MACSOutput(object) :
+    FIELD_NAMES = MACSFile.FIELD_NAMES
+
+GLOBAL_SETTINGS_FN = os.path.join(os.path.split(chipsequtil.__file__)[0],'org_settings.cfg')
+LOCAL_SETTINGS_FN = os.path.expanduser(os.path.join('~','.org_settings.cfg'))
+_ALL_SETTINGS, _LOCAL_SETTINGS, _GLOBAL_SETTINGS = range(3)
+
+def _get_org_settings(org_key=None,addnl_configs=[],src=_ALL_SETTINGS) :
+    """Utility function used by get_org_settings and get_all_settings, should \
+    not be called directly"""
+
+    config = ConfigParser()
+    chipsequtil_base =     conf_fns = []
+    if src in [_LOCAL_SETTINGS, _ALL_SETTINGS] :
+        conf_fns.append(LOCAL_SETTINGS_FN)
+    if src in [_GLOBAL_SETTINGS, _ALL_SETTINGS] :
+        conf_fns.append(GLOBAL_SETTINGS_FN)
+    config.read(conf_fns+addnl_configs)
+
+    d = {}
+    if org_key is None :
+        for sec in config.sections() :
+            # try to cast numeric-looking arguments into float, int
+            d[sec] = dict([(k,parse_number(v)) for k,v in config.items(sec)])
+    else :
+        d = dict([(k,parse_number(v)) for k,v in config.items(org_key)])
+
+    return d
+
+
+def get_org_settings(org_key,addnl_configs=[]) :
+    '''Returns a dict of setting/path values for a given organism as specified
+    in system-wide and user's settings. *org_key* is the organism name as found
+    in the config file, *e.g.* mm9.  *addnl_configs* are filenames of other
+    configuration files to add to the set of settings, usually not needed.
+    Example usage::
+    
+      >>> org_d = get_org_settings('mm9')
+      >>> org_d
+          {'affy_to_known_path': '/nfs/genomes/mouse_gp_jul_07/anno/knownToMOE43-mm9.txt',
+           'annotation_path': '/nfs/genomes/mouse_gp_jul_07/anno/refFlat-mm9.txt',
+           'description': "UCSC mm9 (July '07 build) with full TRANSFAC hypothesis set",
+           'genome': 'mm9',
+           'genome_dir': '/nfs/genomes/mouse_gp_jul_07',
+           'genome_size': 2107000000,
+           'known_gene_anno_path': '/nfs/genomes/mouse_gp_jul_07/anno/knownGene-mm9.txt',
+           'known_gene_xref_path': '/nfs/genomes/mouse_gp_jul_07/anno/kgXref-mm9.txt',
+           'refgene_anno_path': '/nfs/genomes/mouse_gp_jul_07/anno/refFlat-mm9.txt',
+           'theme_hypotheses': '/nfs/vendata/cwng/TRANSFAC/2010_transfac_vert_all_filtic9.tamo',
+           'theme_markov': '/nfs/data/cwng/chipseq/hypotheses/Mouse.markov',
+           'ucsc_chrom_sizes': '/nfs/genomes/mouse_gp_jul_07/mm9.chrom.sizes'}
+      >>> get_org_settings('mm9')['genome_dir']
+          '/nfs/genomes/mouse_gp_jul_07'
+
+    '''
+    return _get_org_settings(org_key,addnl_configs=addnl_configs)
+
+
+def get_all_settings(addnl_configs=[]) :
+    '''Returns a dict of setting/path values for every organism as specified in
+    system-wide and user's settings.'''
+    return _get_org_settings(None,addnl_configs=addnl_configs)
+
+
+def get_global_settings() :
+    '''Returns a dict of the global setting/path values installed with the
+    package.'''
+    return _get_org_settings(None,src=_GLOBAL_SETTINGS)
+
+
+def get_local_settings() :
+    '''Returns a dict of the current user's setting/path values taken from
+    ~/.org_settings.cfg if it exists.'''
+    return _get_org_settings(None,src=_LOCAL_SETTINGS)
+
+
+def check_org_settings(org_key,setting_list) :
+    '''Returns true if all setting names in *setting_list* are found in the 
+    org settings for organism *org_key* and false otherwise. Mostly used
+    internally to sanity check org settings.'''
+    settings = get_org_settings(org_key)
+    return all([s in settings.keys() for s in setting_list])
+
+
+RC_MAP = string.maketrans('acgtACGT','tgcaTGCA')
+def reverse_complement(seq) :
+    """Reverse complements nucleotide string *seq*.  Leaves non-nucleotide characters uneffected."""
+    return seq.translate(RC_MAP)[::-1]
+
+
+def get_gc_content(seq) :
+    '''returns the GC content of a DNA sequence as python string'''
+    seq = seq.lower()
+    return (seq.count('c')+seq.count('g'))/float(len(seq))
+
+
+def get_gc_content_distribution(sequences,bins=100) :
+    '''returns a list of 
+    provided sequences.  Approximation is performed by binning.'''
+    gc_contents = [get_gc_content(s) for s in sequences]
+    gc_contents.sort()
+
+    # count up the sequences for each bin
+    bin_counts = [0.]*bins
+    for c in gc_contents :
+        sample_bin = int(math.floor(c*bins))
+        bin_counts[sample_bin] += 1
+
+    # normalize bin counts
+    norm_bins = [x/len(sequences) for x in bin_counts]
+
+    # create a closure for this set of sequences
+    #def f(seq) :
+    #    gc = get_gc_content(seq)
+    #    return norm_bins[int(math.floor(gc*bins))]
+
+    return norm_bins
+
+
+def get_size_distribution(sequences) :
+    return (len(s) for s in sequences)
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/src/chipsequtil/motiftools.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,2064 @@
+"""
+There is a large number of functions and member fucntions here.  To get started,
+a motif can be instantiated by providing an ambiguity code, a set of aligned DNA
+sequences, or from matrices of counts, probabilities or log-likelihoods (akaPSSMs).
+
+>>> m =  MotifTools.Motif_from_text('TGAAACANNSYWT')
+>>> print m.oneletter()
+TGAAACA..sywT 
+
+Lower case reflects lower information content.  For a more detailed view of the distribution
+of information, try this::
+
+    >>> m.textlogo()
+    #                -- 2.30 bits
+    #
+    #  TGAAACA     T
+    #  TGAAACA     T
+    #  TGAAACA     T
+    #  TGAAACA     T
+    #  TGAAACA  CCAT
+    #  TGAAACA  CCAT
+    #  TGAAACA  GTTT
+    #  TGAAACA  GTTT -- 0.23 bits
+    #  -------------
+    #  TGAAACA..sywT
+
+
+Motif objects may be manipulated largely like text strings (with pythonic
+indexing)::
+
+    >>> print m[4:5].oneletter
+    A 
+    >>> print m[4:7].oneletter
+    ACA 
+    >>> print (m[4:7] + m[1:2]).oneletter
+    ACAG
+    >>> print (m[4:7] + m[1:7]).oneletter
+    ACAGAAACA
+
+and even padded with blanks::
+
+    >>> print  m[-4:7]
+    ...TGAAACA
+
+.. Copyright (2005) Whitehead Institute for Biomedical Research
+.. All Rights Reserved
+
+Author: David Benjamin Gordon
+
+Modified by: Adam Labadorf
+
+"""
+import copy
+import math
+import os
+import pickle
+import re
+import string
+import sys
+import tempfile
+
+pysum = sum
+
+from random import random,shuffle
+from subprocess import call
+
+from chipsequtil import reverse_complement
+class MotifToolsException(Exception) : pass
+
+one2two = {  'W':'AT',    'M':'AC',   'R':'AG',
+             'S':'CG',    'Y':'CT',   'K':'GT'}
+two2one = { 'AT': 'W',   'AC': 'M',  'AG': 'R',
+            'CG': 'S',   'CT': 'Y',  'GT': 'K'}
+revcomp = { 'A':'T',      'T':'A',    'C':'G',   'G':'C',
+            'W':'W',      'S':'S',    'K':'M',   'M':'K',
+            'Y':'R',      'R':'Y',    'N':'N',
+            'B':'N', 'D':'N', 'H':'N', 'V':'N', ' ':'N'}  #[12-11-02] Needs fixing
+
+ACGT = list('ACGT')
+YEAST_BG = {'A': 0.31, 'C': .19, 'G': .19, 'T': .31} #Yeast default background freqs
+
+revcomplement_memo = {'A':'T'}
+revcompTBL = string.maketrans("AGCTagctWSKMYRnN", "TCGAtcgaWSMKTYnN")
+def revcomplement(seq):
+    """A quick reverse-complement routine that memo-izes queries, understands
+    IUPAC ambiguity codes, and preserves case."""
+    global revcomplement_memo
+    try:
+        rc = revcomplement_memo[seq]
+    except KeyError:
+        #_t = map(lambda x,D=revcomp: D[x], seq)
+        #get = revcomp.get
+        #_t = map(get, seq)
+        _t = list(seq.translate(revcompTBL))
+        _t.reverse()
+        rc = ''.join(_t)
+        revcomplement_memo[seq] = rc
+        revcomplement_memo[rc]  = seq
+    return rc
+
+
+def Motif_from_ll(ll):
+    """Constructs a motif object from a log-likelihood matrix, which is in the
+    form of a list of dictionaries."""
+    m = Motif(None,None)
+    m.compute_from_ll(ll)
+    return m
+
+def Motif_from_counts(countmat,beta=0.01,bg={'A':.25,'C':.25,'G':.25,'T':.25}):
+    """
+    Construct a Motif object from a matrix of counts (or probabilities or frequencies).
+    A default set of uniform background frequencies may be overridden.
+
+    beta refers to the number of pseudocounts that should be distributed over each position
+    of the PSSM."""
+    m = Motif('',bg)
+    m.compute_from_counts(countmat,beta)
+    return m
+
+def Motif_from_text(text,beta=0.05,source='',bg=None):
+    """Construct a Motif object from a text string constructed from IUPAC
+    ambiguity codes. 
+
+    A default set of uniform background frequencies may be overridden with
+    a dictionary of the form {'A':.25,'C':.25,'G':.25,'T':.25}).
+
+    beta refers to the number of pseudocounts that should be distributed over each position
+    of the PSSM."""
+    if not bg: bg={'A':.25,'C':.25,'G':.25,'T':.25}
+    m = Motif('',bg)
+    m.compute_from_text(text,beta)
+    m.source = source
+    return m
+
+def copy(motif):
+    """Utility routine for copying motifs"""
+    a = copy.deepcopy(motif)
+    #a.__dict__ = motif.__dict__.copy()
+    return a
+
+class Motif:
+    """A pssm model, with scanning, storing, loading, and other operations. A
+    uniform nucleotide background is assumed if none is provided."""
+    def __init__(self,list_of_seqs_or_text=[],backgroundD=None):
+        self.MAP       = 0
+        self.evalue    = None
+        self.oneletter = ''
+        self.nseqs     = 0
+        self.counts    = []
+        self.width     = 0
+        self.fracs     = []
+        self.logP      = []
+        self.ll        = []
+        self.bits      = []
+        self.totalbits = 0
+        self.maxscore  = 0
+        self.minscore  = 0
+        self.pvalue      = 1
+        self.pvalue_rank = 1
+        self.church      = None
+        self.church_rank = 1
+        self.Cpvalue     = 1
+        self.Cpvalue_rank= 1
+        self.Cchurch     = 1
+        self.Cchurch_rank= 1
+        self.binomial    = None
+        self.binomial_rank=1
+        self.E_seq       = None
+        self.frac        = None
+        self.E_site      = None
+        self.E_chi2      = None
+        self.kellis      = None
+        self.MNCP        = None
+        self.ROC_auc     = None
+        self.realpvalue  = None
+        self.Cfrac       = None
+        self.CRA         = None
+        self.valid     = None
+        self.seeddist  = 0
+        self.seednum   = -1
+        self.seedtxt   = None
+        self.family    = None
+        self.source    = None
+        self.threshold = None
+        self._bestseqs = None
+        self.bgscale   = 1
+        self.best_pvalue = None
+        self.best_factor = None
+        self.gamma     = None
+        self.nbound    = 0
+        self.matchids  = []
+        self.overlap   = None
+        self.cumP      = []
+        self.numbound      = 0
+        self.nummotif      = 0
+        self.numboundmotif = 0
+        self.dataset = None
+        self.bgfile = None
+        self.cverror = None
+        self.beta = None
+        self.match_thresh = None
+        self.progscore = None
+        if backgroundD:
+            self.background = backgroundD
+        else:
+            #self.background = {'A': 0.31, 'C': .19, 'G': .19, 'T': .31} #Yeast Default
+            self.background = {'A':.25,'C':.25,'G':.25,'T':.25} # uniform background
+
+        if type(list_of_seqs_or_text) == type(''):
+            self.seqs = []
+            text = list_of_seqs_or_text
+            self.compute_from_text(text)
+        else:
+            self.seqs = list_of_seqs_or_text
+        if self.seqs:
+            self._parse_seqs(list_of_seqs_or_text)
+            self._compute_ll()
+            self._compute_oneletter()
+            #self._compute_threshold(2.0)
+
+    def __repr__(self):
+        return "%s (%d)"%(self.oneletter, self.nseqs)
+
+    def __str__(self):
+        return "%s (%d)"%(self.oneletter, self.nseqs)
+
+    def summary(self):
+        """return a text string one-line summary of motif and its metrics"""
+        m = self
+        txt = "%-34s (Bits: %5.2f  MAP: %7.2f   D: %5.3f  %3d)  E: %7.3f"%(
+            m, m.totalbits, m.MAP, m.seeddist, m.seednum, nlog10(m.pvalue))
+        if m.binomial!=None:  txt = txt + '  Bi: %6.2f'%(nlog10(m.binomial))
+        if m.church != None:  txt = txt + '  ch: %6.2f'%(nlog10(m.church))
+        if m.frac   != None:  txt = txt + '  f: %5.3f'%(m.frac)
+        if m.E_site != None:  txt = txt + '  Es: %6.2f'%(nlog10(m.E_site))
+        if m.E_seq  != None:  txt = txt + '  Eq: %6.2f'%(nlog10(m.E_seq))
+        if m.MNCP   != None:  txt = txt + '  mn: %6.2f'%(m.MNCP)
+        if m.ROC_auc!= None:  txt = txt + '  Ra: %6.4f'%(m.ROC_auc)
+        if m.E_chi2 != None:
+            if m.E_chi2 == 0: m.E_chi2=1e-20
+            txt = txt + ' x2: %5.2f'%(nlog10(m.E_chi2))
+        if m.CRA    != None:  txt = txt + '  cR: %6.4f'%(m.CRA)
+        if m.Cfrac  != None:  txt = txt + '  Cf: %5.3f'%(m.Cfrac)
+        if m.realpvalue != None: txt = txt + '  P: %6.4e'%(m.realpvalue)
+        if m.kellis != None:  txt = txt +  '  k: %6.2f'%(m.kellis)
+        if m.numbound      :  txt = txt +  '  b: %3d'%(m.numbound)
+        if m.nummotif      :  txt = txt +  '  nG: %3d'%(m.nummotif)
+        if m.numboundmotif :  txt = txt +  '  bn: %3d'%(m.numboundmotif)
+
+        return txt
+
+    def minimal_raw_seqs(self):
+        '''return minimal list of seqs that represent consensus '''
+        seqs = [[], []]
+        for letter in self.oneletter:
+            if one2two.has_key(letter):
+                seqs[0].append(one2two[letter][0])
+                seqs[1].append(one2two[letter][1])
+            else:
+                seqs[0].append(letter)
+                seqs[1].append(letter)
+        if ''.join(seqs[0]) == ''.join(seqs[1]):
+            return  [''.join(seqs[0])] 
+        else:
+            return  [''.join(seqs[0]), ''.join(seqs[0])] 
+    def _compute_oneletter(self):
+        """set the oneletter member variable"""
+        letters = []
+        for i in range(self.width):
+            downcase = None
+            if self.bits[i] < 0.25:
+                letters.append('.')
+                continue
+            if self.bits[i] < 1.0: downcase = 'True'
+            tups = [(self.ll[i][x],x) for x in ACGT if self.ll[i][x] > 0.0]
+            if not tups:  #Kludge if all values are negative (can this really happen?)
+                tups = [(self.ll[i][x],x) for x in ACGT]
+                tups.sort()
+                tups.reverse()
+                tups = [tups[0]]
+                downcase = 'True'
+            tups.sort()      #Rank by LL
+            tups.reverse()
+            bases = [x[1] for x in tups[0:2]]
+            bases.sort()
+            if len(bases) == 2: L = two2one[''.join(bases)]
+            else:               L = bases[0]
+            if downcase: L = L.lower()
+            letters.append(L)
+        self.oneletter = ''.join(letters)
+    def _parse_seqs(self, LOS):
+        """build a matrix of counts from a list of sequences"""
+        self.nseqs = len(LOS)
+        self.width = len(LOS[0])
+        for i in range(self.width):
+            Dc = {'A': 0, 'C': 0, 'T': 0, 'G': 0, 'N': 0}
+            for seq in LOS:
+                key = seq[i]
+                Dc[key] = Dc[key] + 1
+            del(Dc['N'])
+            self.counts.append(Dc)
+
+    def _compute_ll(self):
+        """compute the log-likelihood matrix from the count matrix"""
+        self.fracs = []
+        self.logP  = []
+        self.ll    = []
+        for i in range(self.width):
+
+            Dll  = {'A': 0, 'C': 0, 'T': 0, 'G': 0}
+            Df   = {'A': 0, 'C': 0, 'T': 0, 'G': 0}
+            DlogP= {'A': 0, 'C': 0, 'T': 0, 'G': 0}
+
+            for nuc in self.counts[i].keys():
+
+                #print i,nuc,self.counts[i][nuc],self.nseqs
+                # Dll[nuc] = log2( position nucleotide count/background sequence count )
+                # Dll[nuc] = log2( (count[nuc]+bgscale*bg[nuc])/(bg[nuc]*(num_seqs+bgscale)) )
+
+                pos_nuc_count = self.counts[i][nuc] + self.bgscale*self.background.get(nuc,0.)
+                adj_all_nuc_count = (self.nseqs + self.bgscale) * self.background.get(nuc,1e-10)
+
+                Dll[nuc] = math.log(pos_nuc_count/adj_all_nuc_count,2)
+
+                Pij = self.counts[i][nuc] / float(self.nseqs)
+                Df [nuc] = Pij
+                if Pij > 0:
+                    DlogP[nuc]  = math.log(Pij) / math.log(2.)
+                else:
+                    DlogP[nuc]  = -100  #Near zero
+
+            self.fracs.append(Df)
+            self.logP.append (DlogP)
+            self.ll.append   (Dll)
+        self.P = self.fracs
+        self._compute_bits()
+        self._compute_ambig_ll()
+        self._maxscore()
+
+
+    def compute_from_ll(self,ll):
+        """build motif from an inputed log-likelihood matrix
+
+        (This function reverse-calculates the probability matrix and background frequencies
+        that were used to construct the log-likelihood matrix)
+        """
+        self.ll    = ll
+        self.width = len(ll)
+        self._compute_bg_from_ll()
+        self._compute_logP_from_ll()
+        self._compute_ambig_ll()
+        self._compute_bits()
+        self._compute_oneletter()
+        self._maxscore()
+
+    def _computeP(self):
+        """compute the probability matrix (from the internal log-probability matrix)"""
+        P = []
+        for i in range(self.width):
+            #print i,
+            _p = {}
+            for L in ACGT: _p[L] = math.pow(2.0,self.logP[i][L])
+            P.append(_p)
+        #print
+        self.P = P
+
+    def _compute_bits(self):
+        """set m.totbits to the number of bits and m.bits to a list of bits at
+        each position"""
+        bits = []
+        totbits = 0
+        bgbits  = 0
+        bg      = self.background
+        UNCERT  = lambda x: x*math.log(x)/math.log(2.0)
+        for letter in ACGT:
+            bgbits = bgbits + UNCERT(bg[letter])
+        for i in range(self.width):
+            tot = 0
+            for letter in ACGT:
+                Pij = pow(2.0, self.logP[i][letter])
+                tot = tot + UNCERT(Pij)
+                #bit = Pij * self.ll[i][letter]
+                #if bit > 0:
+                #    tot = tot + bit
+            #print tot, bgbits, tot-bgbits
+            bits.append(max(0,tot-bgbits))
+            totbits = totbits + max(0,tot-bgbits)
+        self.bits = bits
+        self.totalbits = totbits
+
+        
+    def denoise(self,bitthresh=0.5):
+        """set low-information positions (below bitthresh) to Ns"""
+        for i in range(self.width):
+            tot = 0
+            for letter in ACGT:
+                if self.logP:
+                    Pij = pow(2.0, self.logP[i][letter])
+                else:
+                    Pij = pow(2.0, self.ll[i][letter]) * self.background[letter]
+                if Pij > 0.01:
+                    bit = Pij * self.ll[i][letter]
+                    tot = tot + bit
+            if tot < bitthresh:  #Zero Column
+                for letter in ACGT:
+                    self.ll[i][letter] = 0.0
+        self.compute_from_ll(self.ll)
+
+    def giflogo(self,id,title=None,scale=0.8,info_str=''):
+        """make a gif sequence logo"""
+        return giflogo(self,id,title,scale)
+
+    def printlogo(self,norm=2.3, height=10.0):
+        """print a text-rendering of the Motif Logo
+
+        norm
+            maximum number of bits to show
+        height
+            number of lines of text to use to render logo
+        """
+        self._print_bits(norm,height)
+    def print_textlogo(self,norm=2.3, height=8.0):
+        """print a text-rendering of the Motif Logo
+
+        norm
+            maximum number of bits to show
+        height
+            number of lines of text to use to render logo
+        """
+        self._print_bits(norm,height)
+    def _print_bits(self,norm=2.3, height=8.0):
+        """print a text-rendering of the Motif Logo
+
+        norm
+            maximum number of bits to show
+        height
+            number of lines of text to use to render logo
+        """
+        bits   = []
+        tots   = []
+        str    = []
+        for i in range(self.width):
+            D = {}
+            tot = 0
+            for letter in ['A', 'C', 'T', 'G']:
+                if self.logP:
+                    Pij = pow(2.0, self.logP[i][letter])
+                else:
+                    Pij = pow(2.0, self.ll[i][letter]) * self.background[letter]
+                if Pij > 0.01:
+                    '''Old'''
+                    D[letter] = Pij * self.ll[i][letter]
+                    #'''new'''
+                    #Q = self.background[letter]
+                    #D[letter] = ( Pij * math.log(Pij) - Pij * math.log(Q) ) / math.log(2.0)
+                    '''for both old and new'''
+                    tot = tot + D[letter]
+            bits.append(D)
+            tots.append(tot)
+        for i in range(self.width):
+            s = []
+            _l = bits[i].keys()
+            _l.sort(lambda x,y,D=bits[i]: cmp(D[y],D[x]))
+            for key in _l:
+                for j in range(int(bits[i][key] / norm * height)):
+                    s.append(key)
+            str.append(''.join(s))
+        fmt = '%%%ds'%height
+        print '#  %s'%('-'*self.width)
+        for h in range(int(height)):
+            sys.stdout.write("#  ")
+            for i in range(self.width):
+                sys.stdout.write((fmt%str[i])[h])
+            if h == 0:
+                sys.stdout.write(' -- %4.2f bits\n'%norm)
+            elif h == height-1:
+                sys.stdout.write(' -- %4.2f bits\n'%(norm/height))
+            else:
+                sys.stdout.write('\n')
+        print '#  %s'%('-'*self.width)
+        print '#  %s'%self.oneletter
+
+    def _compute_ambig_ll(self):
+        """extend log-likelihood matrix to include ambiguity codes
+        e.g.  What the score of a 'S'?  Here we use the max of C and G."""
+        for Dll in self.ll:
+            for L in one2two.keys():
+                Dll[L] = max(Dll[one2two[L][0]],  Dll[one2two[L][1]] )
+            Dll['N'] = 0.0
+            Dll['B'] = 0.0
+
+    def compute_from_nmer(self,nmer,beta=0.001):  #For reverse compatibility
+        """See compute_from_text.  Here for reverse compatibility"""
+        self.compute_from_text(nmer,beta)
+
+    def compute_from_text(self,text,beta=0.001):
+        """compute a matrix values from a text string of ambiguity codes.
+        Use Motif_from_text utility instead to build motifs on the fly."""
+        prevlett = {'B':'A', 'D':'C', 'V':'T', 'H':'G'}
+        countmat = []
+        text = re.sub('[\.\-]','N',text.upper())
+        for i in range(len(text)):
+            D = {'A': 0, 'C': 0, 'T':0, 'G':0}
+            letter = text[i]
+            if letter in ['B', 'D', 'V', 'H']:  #B == no "A", etc...
+                _omit = prevlett[letter]
+                for L in ACGT:
+                    if L != _omit: D[L] = 0.3333
+            elif one2two.has_key(letter):  #Covers WSMYRK
+                for L in list(one2two[letter]):
+                    D[L] = 0.5
+            elif letter == 'N':
+                for L in D.keys():
+                    D[L] = self.background[L]
+            elif letter == '@':
+                for L in D.keys():
+                    D[L] = self.background[L]-(0.0001)
+                D['A'] = D['A'] + 0.0004
+            else:
+                D[letter] = 1.0
+            countmat.append(D)
+        self.compute_from_counts(countmat,beta)
+
+    def new_bg(self,bg):
+        """change the ACGT background frequencies to those in the supplied dictionary.
+        Recompute log-likelihood, etc. with new background.
+        """
+        counts = []
+        for pos in self.logP:
+            D = {}
+            for L,lp in pos.items():
+                D[L] = math.pow(2.0,lp)
+            counts.append(D)
+        self.background = bg
+        self.compute_from_counts(counts,0)
+
+    def addpseudocounts(self,beta=0):
+        """add pseudocounts uniformly across the matrix"""
+        self.compute_from_counts(self.counts,beta)
+
+    def compute_from_counts(self,countmat,beta=0):
+        """build a motif object from a matrix of letter counts."""
+        self.counts  = countmat
+        self.width   = len(countmat)
+        self.bgscale = 0
+
+        maxcount = 0
+        #Determine Biggest column
+        for col in countmat:
+            tot = pysum(col.values())
+            if tot > maxcount :
+                maxcount = tot
+
+        #Pad counts of remaining columns
+        for col in countmat:
+            tot = pysum(col.values())
+            pad = maxcount - tot
+            for L in col.keys():
+                col[L] = col[L] + pad * self.background.get(L,0.)
+
+        self.nseqs = maxcount
+        nseqs = maxcount
+
+        #Add pseudocounts
+        if beta > 0:  
+            multfactor = {}
+            bgprob = self.background
+            pcounts= {}
+            for L in bgprob.keys():
+                pcounts[L] = beta*bgprob[L]*nseqs 
+            for i in range(self.width):
+                for L in countmat[i].keys():
+                    _t = (countmat[i][L] + pcounts[L]) #Add pseudo
+                    _t = _t / (1.0 + beta)    #Renomalize
+                    countmat[i][L] = _t
+
+        #Build Motif
+        self.counts = countmat
+        self._compute_ll()
+        self._compute_oneletter()
+        self._maxscore()
+
+
+    def _compute_bg_from_ll(self):
+        """compute background model from log-likelihood matrix
+        by noting that:   pA  + pT  + pC  + pG  = 1
+                  and     bgA + bgT + bgC + bgG = 1
+                  and     bgA = bgT,   bgC = bgG
+                  and so  bgA = 0.5 - bgC
+                  and     pA  = lA * bgA,  etc for T, C, G
+                  so...
+                         (lA + lT)bgA + (lC + lG)bgC          =  1
+                         (lA + lT)bgA + (lC + lG)(0.5 - bgA)  =  1
+                         (lA + lT - lC - lG)bgA +(lC +lG)*0.5 =  1
+                          bgA                                 =  {1 - 0.5(lC + lG)} / (lA + lT - lC - lG)
+        + Gain accuracy by taking average of bgA over all positions of PSSM
+        """
+        
+        pow = math.pow
+        bgATtot = 0
+        nocount = 0
+        near0   = lambda x:(-0.01 < x and x < 0.01)
+        for i in range(self.width):
+            _D = self.ll[i]
+            ATtot = pow(2,_D['A']) + pow(2,_D['T'])
+            GCtot = pow(2,_D['C']) + pow(2,_D['G'])
+            if near0(_D['A']) and near0(_D['T']) and near0(_D['G']) and near0(_D['C']):
+                nocount = nocount + 1
+                continue
+            if near0(ATtot-GCtot):     #Kludge to deal with indeterminate case
+                nocount = nocount + 1
+                continue
+            bgAT   = (1.0 - 0.5*GCtot)/(ATtot - GCtot)
+            if (bgAT < 0.1) or (bgAT > 1.1):
+                nocount = nocount + 1
+                continue
+            bgATtot = bgATtot + bgAT
+        if nocount == self.width:  #Kludge to deal with different indeterminate case
+            self.background = {'A':0.25, 'C':0.25, 'G':0.25, 'T':0.25}
+            return
+        bgAT = bgATtot / (self.width - nocount)
+        bgGC = 0.5 - bgAT
+        self.background = {'A':bgAT, 'C':bgGC, 'G':bgGC, 'T':bgAT}            
+        
+    def _compute_logP_from_ll(self):
+        """compute self's logP matrix from the self.ll (log-likelihood)"""
+        log = math.log
+        logP = []
+        for i in range(self.width):
+            D = {}
+            for L in ACGT:
+                ''' if   ll = log(p/b) then
+                       2^ll = p/b
+                  and    ll = log(p) - log(b)
+                  so log(p) = ll + log(b)'''
+                #Pij = pow(2.0, self.ll[i][letter]) * self.background[letter]
+                D[L] = self.ll[i][L] + log(self.background[L])/log(2.)
+            logP.append(D)
+        self.logP = logP
+
+    def _print_ll(self):
+        """print log-likelihood (scoring) matrix"""
+        print "#  ",
+        for i in range(self.width):
+            print "  %4d   "%i,
+        print
+        for L in ['A', 'C', 'T', 'G']:
+            print "#%s "%L,
+            for i in range(self.width):
+                print  "%8.3f "%self.ll[i][L],
+            print
+    def _print_p(self):
+        """print probability (frequency) matrix"""
+        print "#  ",
+        for i in range(self.width):
+            print "  %4d   "%i,
+        print
+        for L in ['A', 'C', 'T', 'G']:
+            print "#%s "%L,
+            for i in range(self.width):
+                print  "%8.3f "%math.pow(2,self.logP[i][L]),
+            print
+    def _print_counts(self):
+        """print count matrix"""
+        print "#  ",
+        for i in range(self.width):
+            print "  %4d   "%i,
+        print
+        for L in ['A', 'C', 'T', 'G']:
+            print "#%s "%L,
+            for i in range(self.width):
+                print  "%8.3f "%self.counts[i][L],
+            print
+        
+    def _maxscore(self):
+        """sets self.maxscore and self.minscore"""
+        total = 0
+        lowtot= 0
+        for lli in self.ll:
+            total = total + max(lli.values())
+            lowtot= lowtot+ min(lli.values())
+        self.maxscore = total
+        self.minscore = lowtot
+
+    def _compute_threshold(self,z=2.0):
+        """for Motif objects assembled from a set of sequence,
+        compute a self.threshold with a z-score based on the distribution
+        of scores in among the original input sequences.
+        """
+        scoretally = []
+        for seq in self.seqs:
+            matches,endpoints,scores = self.scan(seq,-100)
+            scoretally.append(scores[0])
+        ave,std = avestd(scoretally)
+        self.threshold = ave - z *std
+        #print '#%s: threshold %5.2f = %5.2f - %4.1f * %5.2f'%(
+        #    self, self.threshold, ave, z, std)
+
+    def bestscanseq(self,seq):
+        """return score,sequence of the best match to the motif in the supplied sequence"""
+        matches,endpoints,scores = self.scan(seq,-100)
+        t = zip(scores,matches)
+        t.sort()
+        bestseq   = t[-1][1]
+        bestscore = t[-1][0]
+        return bestscore, bestseq
+    
+    def bestscore(self,seq):
+        """return the score of the best match to the motif in the supplied sequence"""
+        return m.bestscan(seq)
+
+    def bestscan(self,seq):
+        """return the score of the best match to the motif in the supplied sequence"""
+        matches,endpoints,scores = self.scan(seq,-100)
+        if not scores: return -100
+        scores.sort()
+        best = scores[-1]
+        return best
+
+    def matchstartorient(self,seq, factor=0.7):
+        """returns list of (start,orientation) coordinate pairs of matches to
+        the motif in the supplied sequence.  Factor is multiplied by m.maxscore
+        to get a match threshold.
+        """
+        ans = []
+        txts,endpoints,scores = self.scan(seq,factor=factor)
+        for txt, startstop in zip(txts,endpoints):
+            start, stop = startstop
+            rctxt  = reverse_complement(txt)
+            orient = (self.bestscore(txt,1) >= self.bestscore(rctxt,1))
+            ans.append((start,orient))
+        return ans
+
+    def scan(self, seq, threshold = '', factor=0.7):
+        """
+        Scan the sequence.  Returns three lists: matching sequences, endpoints,
+        and scores.  The value of 'factor' is multiplied by m.maxscore to get a
+        match threshold if none is supplied
+        """
+        if len(seq) < self.width:
+            return self._scan_smaller(seq,threshold)
+        else:
+            return self._scan(seq,threshold,factor=factor)
+
+    def scansum(self,seq,threshold = -1000):
+        """
+        Sum of scores over every window in the sequence.  Returns
+        total, number of matches above threshold, average score, sum of exp(score)
+        """
+        ll = self.ll
+        sum = 0
+        width        = self.width
+        width_r      = range(width)
+        width_rcr    = range(width-1,-1,-1)
+        width_ranges = zip(width_r,width_rcr)
+        seqcomp      = seq.translate(revcompTBL)
+
+        total = 0
+        hits  = 0
+        etotal= 0
+        for offset in range(len(seq)-width+1):
+            total_f = 0
+            total_r = 0
+            for i,ir in width_ranges:
+                pos = offset+i
+                total_f = total_f + ll[i][    seq[pos]]
+                total_r = total_r + ll[i][seqcomp[pos]]
+            total_max = max(total_f,total_r)
+            if total_max >= threshold:
+                total = total + total_max
+                etotal = etotal + math.exp(total_max)
+                hits  = hits + 1
+            if not hits:
+                ave = 0
+            else:
+                ave = float(total)/float(hits)
+        return total,hits,ave,math.log(etotal)
+
+    def score(self, seq, fwd='Y'):
+        """returns the score of the first w-bases of the sequence, where w is the motif width."""
+        matches, endpoints, scores = self._scan(seq,threshold=-100000,forw_only=fwd)
+        return scores[0]
+
+    def bestscore(self,seq, fwd=''):
+        """returns the score of the best matching subsequence in seq."""
+        matches, endpoints, scores = self._scan(seq,threshold=-100000,forw_only=fwd)
+        if scores: return max(scores)
+        else:      return -1000
+
+    def _scan(self, seq,threshold='',forw_only='',factor=0.7):
+        """internal tility function for performing sequence scans"""
+        ll = self.ll #Shortcut for Log-likelihood matrix
+        if not threshold: threshold = factor * self.maxscore
+        
+        #print '%5.3f'%(threshold/self.maxscore)
+        matches       = []
+        endpoints     = []
+        scores        = []
+        width         = self.width
+        width_r       = range(width)
+        width_rcr     = range(width-1,-1,-1)
+        width_ranges  = zip(width_r,width_rcr)
+
+        seqcomp = seq.translate(revcompTBL)
+
+        for offset in range(len(seq)-self.width+1):    #Check if +/-1 needed
+            total_f = 0
+            total_r = 0
+            for i,ir in width_ranges:
+                pos = offset+i
+                total_f = total_f + ll[i ][    seq[pos]]
+                total_r = total_r + ll[ir][seqcomp[pos]]
+
+            if 0 and total_f > 1:
+                for i,ir in width_ranges:
+                    print seq[offset+i],'%6.3f'%ll[i ][        seq[offset+i] ],'   ',
+                print '= %7.3f'%total_f
+                
+            if 0:
+                print "\t\t%s vs %s: F=%6.2f R=%6.2f %6.2f %4.2f"%(seq[offset:offset+self.width],
+                                                                   self.oneletter,total_f,total_r,
+                                                                   self.maxscore,
+                                                                   max([total_f,total_r])/self.maxscore)
+            if total_f > threshold and ((total_f > total_r) or forw_only):
+                endpoints.append( (offset,offset+self.width-1) )
+                scores.append(total_f)
+                matches.append(seq[offset:offset+self.width])
+            elif total_r > threshold:
+                endpoints.append( (offset,offset+self.width-1) )
+                scores.append(total_r)
+                matches.append(seq[offset:offset+self.width])
+        return matches,endpoints,scores
+    def _scan_smaller(self, seq, threshold=''):
+        """internal utility function for performing sequence scans. The sequence
+        is smaller than the PSSM.  Are there good matches to regions of the PSSM?"""
+        ll = self.ll #Shortcut for Log-likelihood matrix
+        matches   = []
+        endpoints = []
+        scores    = []
+        w         = self.width
+        for offset in range(self.width-len(seq)+1):    #Check if +/-1 needed
+            maximum = 0
+            for i in range(len(seq)):
+                maximum = maximum + max(ll[i+offset].values())
+            if not threshold: threshold = 0.8 * maximum
+            total_f = 0
+            total_r = 0
+            for i in range(len(seq)):
+                total_f = total_f + ll[i+offset      ][        seq[i] ]
+                total_r = total_r + ll[w-(i+offset)-1][revcomp[seq[i]]]
+            if 0:
+                print "\t\t%s vs %s: F=%6.2f R=%6.2f %6.2f %4.2f"%(seq, self.oneletter[offset:offset+len(seq)],
+                                                                   total_f, total_r,  maximum,
+                                                                   max([total_f,total_r])/self.maxscore)
+            if total_f > threshold and total_f > total_r:
+                endpoints.append( (offset,offset+self.width-1) )
+                scores.append(total_f)
+                matches.append(seq[offset:offset+self.width])
+            elif total_r > threshold:
+                endpoints.append( (offset,offset+self.width-1) )
+                scores.append(total_r)
+                matches.append(seq[offset:offset+self.width])
+        return matches,endpoints,scores                
+
+    def mask_seq(self,seq):
+        """return a copy of input sequence in which any regions matching m are
+        replaced with strings of N's """
+        masked = ''
+        matches, endpoints, scores = self.scan(seq)
+        cursor = 0
+        for start, stop in endpoints:
+            masked = masked + seq[cursor:start] + 'N'*self.width
+            cursor = stop+1
+        masked = masked + seq[cursor:]
+        return masked
+
+    def masked_neighborhoods(self,seq,flanksize):
+        """chop up the input sequence into regions surrounding matches to m.
+        Replace the subsequences that match the motif with N's."""
+        ns = self.seq_neighborhoods(seq,flanksize)
+        return [self.mask_seq(n) for n in ns]
+
+    def seq_neighborhoods(self,seq,flanksize):
+        """chop up the input sequence into regions surrounding matches to the
+        motif."""
+        subseqs = []
+        matches, endpoints, scores = self.scan(seq)
+        laststart, laststop = -1, -1
+        for start, stop in endpoints:
+            curstart, curstop = max(0,start-flanksize), min(stop+flanksize,len(seq))
+            if curstart > laststop:
+                if laststop != -1:
+                    subseqs.append(seq[laststart:laststop])
+                laststart, laststop = curstart, curstop
+            else:
+                laststop = curstop
+        if endpoints: subseqs.append(seq[laststart:laststop])
+        return subseqs
+
+    def __sub__(self,other):
+        pass
+        """Overloads the '-' operator to compute the Euclidean distance between
+        probability matrices motifs of equal width."""
+        if type(other) != type(self):
+            print "computing distance of unlike pssms (types %s, %s)"%(
+                type(other),type(self))
+            print 'First: %s'%other
+            print 'Self:  %s'%self
+            sys.exit(1)
+        if other.width != self.width:
+            print "computing distance of unlike pssms (width %d != %d)"%(
+                other.width,self.width)
+            sys.exit(1)
+        D = 0
+        FABS = math.fabs
+        POW  = math.pow
+        for L in self.logP[0].keys():
+            for i in range(self.width):
+                D = D + POW( POW(2,self.logP[i][L]) - POW(2,other.logP[i][L]), 2 )
+                #D = D + FABS( POW(2,self.logP[i][L]) - POW(2,other.logP[i][L]))
+                #D = D + FABS(self.logP[i][L] - other.logP[i][L])
+        return math.sqrt(D)
+
+    def maskdiff(self,other):
+        """a different kind of motif comparison metric.  See THEME paper for
+        details"""
+        return maskdiff(self,other)
+
+    def maxdiff(self):
+        """compute maximum possible Euclidean distance to another motif.  (For
+        normalizing?)"""
+        POW  = math.pow
+        D = 0
+        for i in range(self.width):
+            _min = 100
+            _max = -100
+            for L in ACGT:
+                val = POW(2,self.logP[i][L])
+                if   val > _max:
+                    _max  = val
+                    _maxL = L
+                elif val < _min:
+                    _min  = val
+                    _minL = L
+            for L in ACGT:
+                if L == _minL:
+                    delta = 1-POW(2,self.logP[i][L])           #1-val
+                    D = D + delta*delta
+                else:
+                    D = D + POW( POW(2,self.logP[i][L]), 2)    #0-val
+        return math.sqrt(D)
+                
+    def revcomp(self):
+        """return reverse complement of motif"""
+        return revcompmotif(self)
+    def trimmed(self,thresh=0.1):
+        """return motif with low-information flanks removed.  'thresh' is in bits."""
+        for start in range(0,self.width-1):
+            if self.bits[start]>=thresh: break
+        for stop  in range(self.width,1,-1):
+            if self.bits[stop-1]>=thresh: break
+        m = self[start,stop]
+        return m
+    def bestseqs(self,thresh=None):
+        """return all k-mers that match motif with a score >= thresh"""
+        if not thresh:
+            if self._bestseqs:
+                return self._bestseqs
+        if not thresh: thresh = 0.8 * self.maxscore
+        self._bestseqs = bestseqs(self,thresh)
+        return self._bestseqs
+    def emit(self,prob_min=0.0,prob_max=1.0):
+        """consider motif as a generative model, and have it emit a sequence"""
+        if not self.cumP:
+            for logcol in self.logP:
+                tups = []
+                for L in ACGT:
+                    p = math.pow(2,logcol[L])
+                    tups.append((p,L))
+                tups.sort()
+                cumu = []
+                tot  = 0
+                for p,L in tups:
+                    tot = tot + p
+                    cumu.append((tot,L))
+                self.cumP.append(cumu)
+        s = []
+        #u = random()+0.01 #Can make higher for more consistent motifs
+        for cumu in self.cumP:
+            u = (prob_max-prob_min)*random() + prob_min
+            #u = random()+0.01 #Can make higher for more consistent motifs
+            last = 0
+            for p,L in cumu:
+                if last < u and u <= p:
+                    letter = L
+                    break
+                else: last = p
+#           print L,'%8.4f'%u,cumu
+            s.append(L)
+        #print ''.join(s)
+        return ''.join(s)
+            
+                
+    def random_kmer(self):
+        """generate one of the many k-mers that matches the motif.  See m.emit()
+        for a more probabilistic generator"""
+        if not self._bestseqs: self._bestseqs = self.bestseqs()
+        seqs   = self._bestseqs
+        pos = int(random() * len(seqs))
+        print 'Random: ',self.oneletter,seqs[pos][1]
+        return seqs[pos][1]
+
+    def __getitem__(self,tup):
+        pass
+        """
+        m.__getitem__(tup) -- Overload m[a,b] to submotif.  Less pythonish than [:], but more reliable
+        """
+        if len(tup) != 2:
+            print "Motif[i,j] requires two arguments, not ",tup
+        else:
+            beg, end = tup[0], tup[1]
+            return submotif(self,beg,end)
+    def __getslice__(self,beg,end):
+        pass
+        """
+        m.__getslice__(,beg,end) -- Overload m[a:b] to submotif.
+        """
+        if beg >= end:
+            #Probably python converted negative idx.  Undo
+            beg = beg - self.width
+        return submotif(self,beg,end)
+    def __add__(self,other):
+        pass
+        """
+        m.__add__(other) -- Overload  '+' for concatenating motifs
+        """
+        return merge(self,other,0)
+    def __len__(self):
+        pass
+        """
+        m.__len__()  -- Overload len(m) to return width
+        """
+        return self.width
+    def shuffledP(self):
+        """
+        m.shuffledP() -- Generate motif in which probability matrix has been shuffled.
+        """
+        return shuffledP(self)
+    def copy(self):
+        """return a 'deep' copy of the motif"""
+        a = Motif()
+        a.__dict__ = self.__dict__.copy()
+        return a
+
+    def random_diff_avestd(self,iters=5000):
+        """see modules' random_diff_avestd"""
+        return random_diff_avestd(self,iters)
+    def bogus_kmers(self,count=200):
+        """Generate a faked multiple sequence alignment that will reproduce the
+        probability matrix."""
+
+        POW  = math.pow
+        #Build p-value inspired matrix
+        #Make totals cummulative:
+        # A: 0.1 C: 0.4 T:0.2 G:0.3
+        #                            ->  A:0.0 C:0.1 T:0.5 G:0.7  0.0
+        
+        #Take bg into account:
+        # We want to pick P' for each letter such that:
+        #     P'/0.25  = P/Q
+        # so  P'       = 0.25*P/Q
+        
+        m = []
+        for i in range(self.width):
+            _col = []
+            tot   = 0.0
+            for L in ACGT:
+                _col.append( tot )
+                tot = tot + POW(2,self.logP[i][L]) * 0.25 / self.background[L]
+            _col.append(tot)
+            #Renormalize
+            for idx in range(len(_col)):
+                _col[idx] = _col[idx] / _col[-1]
+            m.append(_col)
+
+        for p in range(0): #Was 5
+            for i in range(len(m)):
+                print '%6.4f  '%m[i][p],
+            print
+
+        seqs=[]
+        for seqnum in range(count+1):
+            f = float(seqnum)/(count+1)
+            s = []
+            for i in range(self.width):
+                for j in range(4):
+                    if (m[i][j] <= f and f < m[i][j+1]):
+                        s.append(ACGT[j])
+                        break
+            seqs.append(''.join(s))
+
+        del(seqs[0])
+        #for i in range(count):
+        #    print ">%3d\n%s"%(i,seqs[i])
+
+        return seqs
+
+
+def minwindowdiff(M1,M2,overlap=5,diffmethod='diff'):
+    #Alternate method: maskdiff, infomaskdiff
+    if type(M1) != type(M2):
+        print "Error: Attempted to compute alignment of objects that are not both Motifs"
+        print "       types %s: %s  and %s: %s"%(M1,type(M1),M2,type(M2))
+        sys.exit(1)
+
+    if M1.width <= M2.width: A = M1; Borig = M2
+    else:                    A = M2; Borig = M1
+    wA = A.width
+    wB = Borig.width
+    O  = overlap
+
+    if   diffmethod == 'diff':
+        diff_fcn = diff
+    elif diffmethod == 'maskdiff':
+        diff_fcn = maskdiff
+    elif diffmethod == 'infomaskdiff':
+        diff_fcn = infomaskdiff
+        
+    mindiff = 1000
+    #print 'minwindodebug    wA ', wA, 'wB ', wB, 'O ', O, 'wA-0', wA-O, 'wB-O', wB-O
+    for Astart in range(wA-O+1):
+        subA = A[Astart:Astart+O]
+        for B in [Borig, Borig.revcomp()]:
+            for Bstart in range(wB-O+1):
+                subB = B[Bstart:Bstart+O]
+                mindiff = min(mindiff, diff_fcn(subA,subB))
+                #print 'minwindodebug     ',subA, subB, diff_fcn(subA,subB)
+    return mindiff
+    
+
+def minaligndiff(M1,M2,overlap=5,diffmethod='diff'):
+    #Alternate method: maskdiff, infomaskdiff
+    if type(M1) != type(M2):
+        print "Error: Attempted to compute alignment of objects that are not both Motifs"
+        print "       types %s: %s  and %s: %s"%(M1,type(M1),M2,type(M2))
+        sys.exit(1)
+
+    if M1.width <= M2.width:
+        A = M1; Borig = M2
+        switch = 0
+    else:
+        A = M2; Borig = M1
+        switch = 1
+    wA = A.width
+    wB = Borig.width
+    O  = overlap
+
+    '''
+    Here is the figure to imagine:
+       012345678901234567890   wA: 6  Bstart: 6-3     = 3
+         A         (A)         wB: 11 Bstop:  6+11-3-1= 13
+       ------     %%%%%%        O: 3  lastA:  6+11-3-3= 11
+          -----------
+          |O|  B
+    '''
+
+    if   diffmethod == 'diff':
+        diff_fcn = diff
+    elif diffmethod == 'maskdiff':
+        diff_fcn = maskdiff
+    elif diffmethod == 'infomaskdiff':
+        diff_fcn = infomaskdiff
+    
+    Bstart = wA-O
+    Bstop  = wA+wB-O-1
+    lastA  = wA+wB-O-O
+    Dmin = 1000
+    Dmins=[]
+    #print A
+    #print '%s%s'%(' '*Bstart,Borig)
+    for B in [Borig, Borig.revcomp()]:
+        for start in range(0,lastA+1):
+            Bpos = []
+            Apos = []
+            for offset in range(wA):
+                abs = start+offset
+                if abs >= Bstart and abs <= Bstop:
+                    Apos.append(offset)
+                    Bpos.append(abs-Bstart)
+            subA = A[min(Apos),max(Apos)+1]
+            subB = B[min(Bpos),max(Bpos)+1]
+            #print '%s%s\n%s%s  %f'%(
+            #    ' '*start, subA,
+            #    ' '*start, subB,   diff_fcn(subA,subB))
+            if switch: _diff = diff_fcn(subB,subA)
+            else:      _diff = diff_fcn(subA,subB)
+            Dmin = min(Dmin, _diff)
+    return Dmin
+    
+'''
+To compare 2 motifs of the same width, there are these five functions:
+
+m1 - m2            - Euclidean Distance (sqrt(sum_col(sum_row)))
+diff(m1,m2)        - psuedo-Euclidean (sum_col(sqrt(norm(sum_row)))/#col
+maskdiff(m1,m2)    - diff, but excluding positions with "N" in m2
+infomaskdiff(m1,m2)- diff, but scaling distance by normalized
+     information content at each position in m2.
+diverge(m1,m2)     - Mutual information sum[p log (p/q)]
+
+**Note that maskdiff, infomaskdiff, and diverge are not symmetric functions
+
+To compare 2 motifs of different widths, there is the function:
+
+minaligndiff(M1,M2,overlap=5,diffmethod='diff')
+
+this does a 'sliding' comparison of two motifs and reports the minimum
+distance over all alignments.  overlap refers to the minumum overlap
+required while sliding.  Below, overlap is '2'.  The default is '5'.
+
+      ------
+          -----------
+
+You can optionally specify the distance metric as a text string.
+The default is 'diff'.
+
+'''
+
+
+def diff(self,other):
+    """psuedo-Euclidean (sum_col(sqrt(norm(sum_row)))/#col"""
+    if type(other) != type(self):
+        print "computing distance of unlike pssms (types %s, %s)"%(
+            type(other),type(self))
+        print 'First: %s'%other
+        print 'Self:  %s'%self
+        sys.exit(1)
+    if other.width != self.width:
+        print "computing distance of unlike pssms (width %d != %d)"%(
+            other.width,self.width)
+        sys.exit(1)
+    POW     = math.pow
+    Dtot    = 0
+    for i in range(self.width):
+        '''Computes distance'''
+        D = 0
+        for L in ACGT:
+            D = D + POW( POW(2,self.logP[i][L]) - POW(2,other.logP[i][L]), 2 )
+        Dtot = Dtot + math.sqrt(D)/math.sqrt(2.0)
+    return Dtot/self.width
+    
+
+def maskdiff(self,other):
+    """diff, but excluding positions with 'N' in m2. Return pseudo-Euclidean
+    distance, but only include columns that are not background."""
+    if type(other) != type(self):
+        print "computing distance of unlike pssms (types %s, %s)"%(
+            type(other),type(self))
+        print 'First: %s'%other
+        print 'Self:  %s'%self
+        sys.exit(1)
+    if other.width != self.width:
+        print "computing distance of unlike pssms (width %d != %d)"%(
+            other.width,self.width)
+        sys.exit(1)
+
+    Dtot = 0
+    POW  = math.pow
+    NEAR0= lambda x:(-0.01 < x and x < 0.01)
+    divisor = 0
+    for i in range(self.width):
+        nearcount = 0
+
+        '''Implements mask'''
+        for L in ACGT:
+            diff = POW(2,other.logP[i][L]) - other.background[L]
+            if NEAR0(diff): nearcount = nearcount + 1
+        if nearcount == 4:
+            #print 'Skipping position %d :'%i,other.logP[i]
+            continue
+
+        '''Computes distance'''
+        divisor = divisor + 1
+        D = 0
+        for L in ACGT:
+            D = D + POW( POW(2,self.logP[i][L]) - POW(2,other.logP[i][L]), 2 )
+        Dtot = Dtot + math.sqrt(D)/math.sqrt(2.0)
+    return Dtot/divisor
+
+def infomaskdiff(self,other):
+    """Return pseudo-Euclidean distance, but scale column distance by
+    information content of "other".  Used by THEME"""
+    if type(other) != type(self):
+        print "computing distance of unlike pssms (types %s, %s)"%(
+            type(other),type(self))
+        print 'First: %s'%other
+        print 'Self:  %s'%self
+        sys.exit(1)
+    if other.width != self.width:
+        print "computing distance of unlike pssms (width %d != %d)"%(
+            other.width,self.width)
+        sys.exit(1)
+
+    maxbits = math.log( 1.0/min(other.background.values()) ) / math.log(2.0)
+    '''or... alternatively'''
+    #print maxbits, max(other.bits)
+    #print other.bits
+    maxbits = max(other.bits)
+    if maxbits < 0.1:  #'''There is nothing important here'''
+        return 1
+    
+    Dtot    = 0
+    POW     = math.pow
+    divisor = 0
+    '''Computes distance'''
+    for i in range(self.width):
+        D = 0
+        for L in ACGT:
+            D = D + POW( POW(2,self.logP[i][L]) - POW(2,other.logP[i][L]), 2 )
+        col_dist  = math.sqrt(D)/math.sqrt(2.0)
+        col_scale = other.bits[i]/maxbits
+        divisor = divisor + col_scale
+        Dtot = Dtot + col_dist*col_scale
+    return Dtot/divisor
+
+def diverge(self,other):
+    """Yet another distance metric"""
+    if type(other) != type(self):
+        print "computing distance of unlike pssms (types %s, %s)"%(
+            type(other),type(self))
+        print 'First: %s'%other
+        print 'Self:  %s'%self
+        sys.exit(1)
+    if other.width != self.width:
+        print "computing distance of unlike pssms (width %d != %d)"%(
+            other.width,self.width)
+        sys.exit(1)
+
+    Dtot = 0
+    POW  = math.pow
+    LOG2 = lambda x:math.log(x)/math.log(2.0)
+    NEAR0= lambda x:(-0.01 < x and x < 0.01)
+    divisor = 0
+    for i in range(self.width):
+        nearcount = 0
+
+        '''Implements mask'''
+        for L in ACGT:
+            diff = POW(2,other.logP[i][L]) - self.background[L]
+            if NEAR0(diff): nearcount = nearcount + 1
+        if nearcount == 4:
+            #print 'Skipping position %d :'%i,other.logP[i]
+            continue
+
+        '''Computes distance'''
+        divisor = divisor + 1
+        D = 0
+        for L in ACGT:
+            Pself = POW(2, self.logP[i][L])
+            Pother= POW(2,other.logP[i][L])
+            D = D + Pself * LOG2(Pself/Pother)
+        Dtot = Dtot + D
+    return Dtot/divisor
+
+
+
+def bestseqs(motif,thresh, seq='',score=0,depth=0,bestcomplete=None,SEQS=[]):
+    """This function returns a list of all sequences that a motif could
+    match match with a sum(log-odds) score greater than thresh."""
+    if depth == 0:
+        SEQS = []  #Must be a python 2.1 bug. I shouldn't have to do this
+    if not bestcomplete:
+        M = motif
+        maxs = []
+        for i in range(M.width):
+            bestj = 'A'
+            for j in ['C', 'G', 'T']:
+                if M.ll[i][j] > M.ll[i][bestj]:
+                    bestj = j
+            maxs.append(M.ll[i][bestj])
+        bestcomplete = []
+        for i in range(M.width):
+            tot = 0
+            for j in range(i,M.width):
+                tot = tot + maxs[j]
+            bestcomplete.append(tot)
+    if depth == motif.width:
+        if score > thresh:
+            SEQS.append((score,seq))
+        #if len(SEQS) > 2000:
+        #    thresh = 1000.0 # Return Early, You don't really want all these sequences, do you?
+        return
+    if depth==-1:
+        print '# %-10s %6.3f %6.3f %2d'%(seq, score, bestcomplete[depth], depth)
+    if score + bestcomplete[depth] < thresh: return
+    #if depth > 0 and len(SEQS) > 2000:
+    #    return
+    for L in ACGT:
+        newseq   = seq + L
+        newscore = score + motif.ll[depth][L]
+        bestseqs(motif,thresh,newseq,newscore,depth+1,bestcomplete,SEQS)
+    if depth == 0:
+        SEQS.sort()
+        SEQS.reverse()
+        return SEQS
+
+def seqs2fasta(seqs,fasta_file = ''):
+    """
+    seqs2fasta(seqs,fasta_file = '') -- Dumps a Fasta formatted file of sequences,
+    keyed by the sequence itself::
+
+      >ACTTTTTGTCCCA
+      ACTTTTTGTCCCA
+      >ACTTTTGGGGCCA
+      ACTTTTGGGGCCA
+        ...
+
+    """
+    if not fasta_file:
+        fasta_file = tempfile.mktemp()
+    FH = open(fasta_file,'w')
+    for i in range(len(seqs)):
+        FH.write(">%d\n%s\n"%(i,seqs[i]))
+    FH.close()
+    return fasta_file
+
+def top_nmers(N,seqs,with_counts = 0,purge_Ns = ''):
+    """Assemble list of all nmers (kmers) with width 'N' from supplied sequences.
+    Option with_counts returns list of (kmer, count) tuples instead.  Purge N's
+    ignores kmers containing N's.  """
+    Nmers = {}
+    revcompTBL = string.maketrans("AGCTagctnN", "TCGAtcganN")
+    for seq in seqs:
+        for i in range(len(seq)-N+1):
+            Nmer = seq[i:i+N]
+            if purge_Ns:
+                if Nmer.find('N') >= 0: continue
+            _t = list(Nmer.translate(revcompTBL))
+            _t.reverse()
+            NmerRC = ''.join(_t)   # _t used until here to revese comp seq
+            _t = [Nmer, NmerRC]
+            _t.sort()
+            NmerKey = _t[0]        # _t used until here to get alphabetically first seq
+            if Nmers.has_key(NmerKey):
+                Nmers[NmerKey] = Nmers[NmerKey] + 1
+            else:
+                Nmers[NmerKey] = 1
+    sorted = Nmers.keys()
+    sorted.sort(lambda x,y,D=Nmers:cmp(D[y],D[x]) or cmp(x,y))
+    #for i in range(10):
+    #    print "# %2d  %s %d"%(i,sorted[i],Nmers[sorted[i]])
+    if with_counts:
+        return zip(sorted,map(lambda x,N=Nmers:N[x], sorted))
+    else:
+        return sorted
+
+def m_matches(seqs,wmer,m):
+    """Returns list of all kmers among sequences that have at most
+    m mismatches to the supplied wmer (kmer)."""
+    matches = []
+    width = len(wmer)
+    for (nmer, count) in top_nmers(width,seqs,'with counts'):
+        match = 0
+        for i in range(width):
+            if nmer[i] == wmer[i]:
+                match = match+1
+        if match >= m:
+            for i in range(count):
+                matches.append(nmer)
+    return matches
+
+def compare_seqs(s1, s2):
+    pass
+    """
+    compare_seqs(s1, s2) 
+    """
+    if len(s1) > len(s2):
+        long  = s1
+        short = s2
+    else:
+        long  = s2
+        short = s1
+    (maxcount,max_i) = (0,0)
+    for i in range(len(long)-len(short)+1):
+        idcount_f = 0
+        idcount_r = 0
+        for j in range(len(short)):
+            if short[j] == long[i+j]:
+                idcount_f = idcount_f + 1
+            if short[-(j+1)] == revcomp[long[i+j]]:
+                idcount_r = idcount_r + 1
+        if (idcount_f > maxcount and idcount_f >= idcount_r):
+            maxcount = idcount_f
+            max_i    = i
+        elif (idcount_r > maxcount):
+            maxcount = idcount_r
+            max_i    = i
+        #print i,j,idcount_f,idcount_r,maxcount
+    maxfrac = float(maxcount) / len(short)
+    print maxfrac,maxcount,len(short)
+    return maxfrac,short,long[max_i:max_i+len(short)]
+
+def shuffle_bases(m):
+    """return a new motif object in which the probabilities are randomly
+    re-assigned to different letters at the same position."""
+    C = []
+    letts = list('ACGT')
+    for i in range(m.width):
+        D = {}
+        vals = m.counts[i].values()
+        shuffle(vals)
+        for i in range(4):
+            D[letts[i]] = vals[i]
+        C.append(D)
+    n = Motif()
+    #n.__dict__ = m.__dict__.copy() #May copy too much information (cached diff information, etc...)
+    n.compute_from_counts(C)
+    return n
+
+def random_diff_avestd(motif,iters=5000):
+    """Return the average & stddev distance ('diff') between a
+    motif and "iters" random motifs of the same width."""
+    w = motif.width
+    vals = []
+    for i in range(iters):
+        vals.append(motif - Random_motif(w))
+    return avestd(vals)
+
+def random_motif(w):
+    """Generate a random motif of width w.  Each position will have a dominant
+    letter with probability around 0.91."""
+    C = []
+    for i in range(w):
+        D = {}
+        tot = 0
+        p = int(random.random() * 4)
+        Lup = ACGT[p]
+        for L in ACGT:
+            D[L] = 0.1
+            tot = tot + 0.001
+        D[Lup] = D[Lup] + 1
+        for L in ACGT:
+            D[L] = D[L]/tot
+        C.append(D)
+    m = Motif()
+    m.compute_from_counts(C)
+    return m
+
+def toDict(M):
+    pass
+    '''
+    toDict(M) -- Convert a 2D array to a list of dictionaries (which is how the motif object
+                 stores information internally).  Assumes M entries are in alphabetical order (ACGT)
+    '''
+    if type(M[0]) == type(0.0):
+        return toDictVect(M)
+    else:
+        a = []
+        for i in range(len(M)):
+            a.append(toDictVect(M[i]))
+        return a
+        
+def toDictVect(V):
+    pass
+    """
+    toDictVect(V) -- Convert a 1D vector to a dictionary of DNA letters.  Assumes values
+    in V are in alphabetical order (ACGT).
+    """
+    D = {}
+    for L,i in (('A',0), ('C',1), ('G',2), ('T',3)):
+        D[L]=V[i]
+    return D
+
+def submotif(self,beg,end):
+    """**Deprecated** Use slice functionality (m[2:4]) instead.
+    
+    Utility function
+    for extracting sub-motifs and padding motifs."""
+    bg = self.background.copy()
+    P = []
+
+    #Determine if any 'zeros' should be added at begining
+    #because the user has specified a negative beg index
+    for i in range(beg,0):
+        P.append(bg.copy())
+
+    #Copy relevant content of motif
+    start = max(beg,0)
+    stop  = min(end,self.width)
+    for i in range(start,stop):
+        D = {}
+        for L in ACGT:
+            D[L] = math.pow(2.,self.logP[i][L])
+        P.append(D)
+
+    #Determine if any 'zeros' should be added at the end
+    #because the user has specified a width too large
+    for i in range(self.width,end):
+        P.append(bg.copy())
+
+    #print "BEG, END", beg,end
+    #for i in range(beg,end):
+    #    print i,P[i]
+
+    #Build the Motif
+    M = copy.deepcopy(self)
+    #M = Motif(None,bg.copy())
+    M.compute_from_counts(P)
+    M.source = self.source
+    return M
+                
+def shuffledP(self):
+    """Construct a motif in which the letter distributions are preserved but
+    are reassigned to rondom positions in the motif."""
+    bg = self.background.copy()
+    P = []
+
+    #Copy relevant content of motif
+    for i in range(0,self.width):
+        D = {}
+        _s = ACGT[:]
+        shuffle(_s)
+        for L,_L in zip(ACGT,_s):
+            D[L] = math.pow(2.,self.logP[i][_L])
+        P.append(D)
+
+    #Build the Motif
+    M = copy.deepcopy(self)
+    #M = Motif(None,bg.copy())
+    M.compute_from_counts(P)
+    M.source = self.source
+    return M
+
+def revcompmotif(self):
+    """Construct the reverse complement of the motif.  Use m.revcomp() member
+    function instead."""
+    bg = self.background.copy()
+    P = []
+
+    for i in range(self.width):
+        D = {}
+        for L in ACGT:
+            D[L] = math.pow(2.,self.logP[self.width-i-1][revcomp[L]])
+        P.append(D)
+
+    #Build the Motif
+    M = copy.deepcopy(self)
+    M.compute_from_counts(P)
+    return M
+        
+
+def sum(motifs,weights=[]):
+    """Perhaps better called 'average'.  Constructs a motif by averaging the
+    probabilities at each position of the (pre-aligned) input motifs.  Optional
+    weights can be assigned, and must be in the same order as the motifs. 
+    """
+    if not weights:
+        weights = [1.0] * len(motifs)
+    tot = 0.0
+    for w in weights: tot=tot+float(w)
+    weights = [(w/tot) for w in weights]
+    C = []
+    for c in motifs[0].fracs:
+        D = {}
+        for L in ACGT: D[L] = 0.0
+        C.append(D)
+    for m,w in zip(motifs,weights):
+        for i in range(m.width):
+            for L in ACGT:
+                C[i][L] = C[i][L] + m.fracs[i][L]*w
+    motif = Motif_from_counts(C,0.0,bg=motifs[0].background)
+    return motif.trimmed()
+
+
+def giflogo(motif,id,title=None,scale=0.8):
+    """Interface to the 'weblogo/seqlogo' perl
+    scripts that generate colorful sequence logos
+    """
+    return seqlogo(motif,id,title,scale,format='GIF')
+
+
+seqlogo_formats = ('GIF','PDF','EPS','PNG')
+illegal_fn_chars = '&;/ ()'
+fn_trans = string.maketrans(illegal_fn_chars,'_'*len(illegal_fn_chars))
+def seqlogo(motif,motif_id,title=None,scale=0.8,img_format='GIF') :
+    """Interface to the'weblogo/seqlogo' perl scripts that generate colorful
+    sequence logos.  Available formats are %s.  Replaces illegal filename
+    characters in *id* parameter (i.e. '%s') with underscores when writing
+    to file.  The executable *seqlogo* must be on your path.
+    """%(seqlogo_formats,illegal_fn_chars)
+    #SEQLOGO = TAMOpaths.weblogodir + 'seqlogo'
+    #TAMOpaths.CHECK(SEQLOGO,'','Weblogo/Seqlogo')
+    kmers   = motif.bogus_kmers(100)
+    width   = float(len(kmers[0]) )
+    height  = float(4)
+    m       = motif
+    width, height = width*scale, height*scale
+    tmp     = tempfile.mktemp() + '.fsa'
+    if title is None:
+        title = motif_id
+
+    if img_format.upper() not in seqlogo_formats :
+        raise MotifToolsException('seqlogo requires one of %s'%seqlogo_formats)
+
+    seqs2fasta(kmers,tmp)
+    fn = id.translate(fn_trans)
+    cmd = 'seqlogo -F %s -acpY -w%d -h%d -k 1 -M -f %s -o %s -t "%s" '%(
+          img_format.upper(), width, height, tmp, fn, title)
+
+    call(cmd,shell=True)
+    return "%s.%s"%(fn,img_format.lower())
+
+
+def merge(A,B,overlap=0):
+    """**Deprecated** Use the '+' operator instead.
+    
+    Used for concatenating motifs into a new motif, allowing for the averaging
+    of overlapping bases between them.
+    """
+    if (overlap < 0 or overlap > A.width or overlap >B.width):
+        print 'Cannot overlap %s with %s by %d bases'%(A.oneletter,B.oneletter,overlap)
+        return None
+
+    #Build Probability matrix.  Width will be A.width + B.width - overlap
+    w = A.width + B.width - overlap
+
+    P = []
+    #Make a copy of A's probabilities into P
+    for i in range(A.width):
+        D = {}
+        logP = A.logP[i]
+        for L in logP.keys():
+            D[L] = math.pow(2,logP[L])
+        P.append(D)
+    #Add B's first 'overlap' probabilities to last 'overlap' probabilities of P
+    for i in range(overlap):
+        logP = B.logP[i]
+        Pidx = len(P)-overlap+i
+        _tot = 0
+        for L in logP.keys():
+            P[Pidx][L] = (P[Pidx][L] + math.pow(2,logP[L])) / 2.0
+            P[Pidx][L] = max(P[Pidx][L],math.pow(2,logP[L]))
+            _tot = _tot + P[Pidx][L]
+        for L in logP.keys():
+            P[Pidx][L] = P[Pidx][L] / _tot
+    #Append B's remaining probabilites to P
+    for i in range(overlap,B.width):
+        D = {}
+        logP = B.logP[i]
+        for L in logP.keys():
+            D[L] = math.pow(2,logP[L])
+        P.append(D)
+        
+    #Build a motif
+    M = Motif(None,A.background.copy())
+    M.source = A.source,B.source
+    M.compute_from_counts(P)
+    return M
+
+def avestd(vals):
+    """return an (average, stddev) tuple computed from the supplied list of values"""
+    (sum, sum2) = (0.,0.)
+    N = float(len(vals))
+    for val in vals:
+        sum  = sum  + float(val)
+        sum2 = sum2 + float(val)*float(val)
+    if N == 1:
+        ave = sum
+        std = 0
+    else:
+        ave = sum /  N
+        std = math.sqrt( (sum2-(N*ave*ave)) / (N-1.0) )
+    return ave,std
+
+
+def load(filename):
+    """load a 'TAMO'-formatted motif file"""
+    FID = open(filename,'r')
+    lines = FID.readlines()
+    FID.close()
+    motifs   = []
+    seedD    = {}
+    seedfile = ''
+    for i in range(len(lines)):
+        if lines[i][0:10] == 'Log-odds matrix'[0:10]:
+            w = len(lines[i+1].split())-1
+            ll = []
+            for pos in range(w):
+                ll.append({})
+            for j in range(0,4):
+                toks = lines[i+j+2].split()
+                L = toks[0][1]
+                for pos in range(w):
+                    ll[pos][L] = float(toks[pos+1])
+            m = Motif_from_ll(ll)
+            motifs.append(m)
+        if lines[i][0:6] == 'Motif '[0:6]:
+            toks =  lines[i].split()
+            motifs[-1].nseqs    = float(re.sub('[\(\)]','',toks[3]))
+            motifs[-1].totalbits= float(toks[5])
+            motifs[-1].MAP      = float(toks[7])
+            motifs[-1].seeddist = float(toks[9])
+            motifs[-1].seednum  = int(toks[10][0:-1])
+            motifs[-1].pvalue   = math.pow(10,-float(toks[12]))
+
+            if 'ch:' in toks:
+                _idx = toks.index('ch:')
+                motifs[-1].church = math.pow(10,-float(toks[_idx+1]))
+            if 'Es:' in toks:
+                _idx = toks.index('Es:')
+                motifs[-1].E_site = math.pow(10,-float(toks[_idx+1]))
+            if 'x2:' in toks:
+                _idx = toks.index('x2:')
+                motifs[-1].E_chi2 = math.pow(10,-float(toks[_idx+1]))
+            if 'Eq:' in toks:
+                _idx = toks.index('Eq:')
+                motifs[-1].E_seq = math.pow(10,-float(toks[_idx+1]))
+            if 'mn:' in toks:
+                _idx = toks.index('mn:')
+                motifs[-1].MNCP = float(toks[_idx+1])
+            if 'f:' in toks:
+                _idx = toks.index('f:')
+                motifs[-1].frac = float(toks[_idx+1])
+            if 'Ra:' in toks:
+                _idx = toks.index('Ra:')
+                motifs[-1].ROC_auc = float(toks[_idx+1])
+            if 'cR:' in toks:
+                _idx = toks.index('cR:')
+                motifs[-1].CRA     = float(toks[_idx+1])
+            if 'Cf:' in toks:
+                _idx = toks.index('Cf:')
+                motifs[-1].Cfrac   = float(toks[_idx+1])
+            if 'k:' in toks:
+                _idx = toks.index('k:')
+                motifs[-1].kellis  = float(toks[_idx+1])
+
+            if 'b:' in toks:
+                _idx = toks.index('b:')
+                motifs[-1].numbound = int(toks[_idx+1])
+            if 'nG:' in toks:
+                _idx = toks.index('nG:')
+                motifs[-1].nummotif = int(toks[_idx+1])
+            if 'bn:' in toks:
+                _idx = toks.index('bn:')
+                motifs[-1].numboundmotif = int(toks[_idx+1])
+
+
+
+        if lines[i][0:10] == 'Threshold: '[0:10]:
+            toks =  lines[i].split()
+            motifs[-1].threshold= float(toks[1])
+        if lines[i][0:5] == 'Seed '[0:5]:
+            toks = lines[i].split()
+            id = int(toks[1][0:-1])  #'10:' -> '10'
+            seedD[id] = toks[2]
+        if lines[i][0:7] == 'Source: '[0:7]:
+            motifs[-1].source = lines[i][7:].strip()
+        if lines[i][0:6] == 'Gamma: '[0:6]:
+            motifs[-1].gamma = float(lines[i][6:])
+        if lines[i][0:6] == 'Evalue: '[0:6]:
+            motifs[-1].evalue = float(lines[i][7:].strip())
+        if lines[i][0:22]=='Program specific score: '[0:22]:
+            tempprogscore=lines[i][23:].split(":");
+
+            for i in range(len(tempprogscore)):
+                tempprogscore[i]=tempprogscore[i].strip()
+
+            if len(tempprogscore)>1:
+                try:
+                    tempprogscore[1]=float(tempprogscore[1])
+                except ValueError:
+                    tempprogscore[1]=tempprogscore[1]
+                motifs[-1].progscore=tempprogscore
+
+        if lines[i][0:10] == 'fasta file:'[0:10]:
+            parts=lines[i].strip().split()
+            motifs[-1].dataset, motifs[-1].beta, motifs[-1].bgfile = \
+                        parts[2],float(parts[4]), parts[7]
+
+    if lines[i][0:21]=='classification error: '[0:21]:
+        motifs[-1].cverror=float(lines[i][22:].strip())
+    if lines[i][0:20]=='SVM match threshold: '[0:20]:
+        motifs[-1].match_thresh=float(lines[i][21:].strip())
+    if lines[i].find('Using')>=0 and lines[i].find('as seeds')>=0:
+            '''#Using all (132) motifs in SLT_081503.seeds as seeds:'''
+            seedfile = lines[i].split()[-3]
+    for i in range(len(motifs)):
+        if seedfile: motifs[i].seedfile = seedfile
+        seednum = motifs[i].seednum
+        if seedD.has_key(seednum):
+            motifs[i].seedtxt = seedD[seednum]
+    return motifs
+    
+def save_motifs(motifs,filename,kmer_count=20):
+    """Save list of motifs as a 'TAMO'-formatted motif file to the specificied file.
+    optional kmer_count specificies how many sequences to include in the printed
+    multiple sequence alignment that recapitulates the probability matrix."""
+    try :
+        print_motifs(motifs,kmer_count,f=filename)
+    except:
+        print '!-- Error saving motifs to %s'%filename
+        raise
+    
+def print_motif(motif,kmer_count=20,istart=0,f=None):
+    """Print a motif in the 'TAMO'-format.  istart specificies the motif number, and 
+    optional kmer_count specificies how many sequences to include in the printed
+    multiple sequence alignment that recapitulates the probability matrix. """
+    print_motifs([motif],kmer_count,istart)
+    sys.stdout.flush()
+
+def print_motifs(motifs,kmer_count=20,istart=0,f=None):
+    """Print list of motifs as a 'TAMO'-formatted motif file to the specificied file.
+    Optional kmer_count specificies how many sequences to include in the printed
+    multiple sequence alignment that recapitulates the probability matrix.
+    istart specifies number from which to begin motif ids."""
+
+    # handle f input cases
+    if f is None :
+        f = sys.stdout
+    elif isinstance(f,str) :
+        f = open(f,'w')
+
+    i = istart-1
+    for m in motifs:
+        i = i + 1
+        print >>f,  "Log-odds matrix for Motif %3d %s"%(i,m)
+        m._print >>f, _ll()
+        #print >>f,  "Probability matrix for Motif %3d %s"%(i,m)
+        #m._print >>f, _p()
+        print >>f,  "Sequence Logo"
+        m._print >>f, _bits()
+        for newprop in ('gamma', 'church', 'E_site', 'E_seq', 'E_chi2', 'realpvalue',
+                        'kellis', 'MNCP', 'ROC_auc', 'CRA', 'Cfrac', 'frac', 'binomial'):
+            if not m.__dict__.has_key(newprop):   #Kludge to deal w/ old shelves
+                m.__dict__[newprop] = None
+        if m.seedtxt:  print >>f,  "Seed: %3d %s"%(i,m.seedtxt)
+        if m.gamma:    print >>f,  "Gamma: %7.5f"%m.gamma
+        if m.evalue != None: print >>f,  'Evalue: %6.3e'%m.evalue
+        if m.progscore is not None :
+            printableProgscore=(m.progscore[0],str(m.progscore[1]))
+            print >>f,  'Program specific score: '+ ": ".join(printableProgscore)
+
+        if m.family:   print >>f,  "Family: ",m.family
+        if m.source:   print >>f,  "Source: ",m.source
+        if m.dataset:  print >>f,  "fasta file: %s beta: %f background sequences: %s"%(m.dataset,m.beta,m.bgfile)
+        if m.match_thresh: print >>f,  "SVM match threshold: ",m.match_thresh
+        if m.cverror:  print >>f,  "classification error: ",m.cverror
+        #Motif   0 NGAGGGGGNN (0)            (Bits:   8.24   MAP:   6.53   D:  0.21  0)  Enr: 54.000 
+        print >>f,  "Motif %3d %-25s (Bits: %5.2f  MAP: %5.2f   D: %5.3f  %2d) E: %6.3f"%(
+            i, m, m.totalbits, m.MAP, m.seeddist, m.seednum, nlog10(m.pvalue)),
+        if m.binomial!=None:  print >>f,  ' Bi: %5.2f'%nlog10(m.binomial),
+        if m.church != None:  print >>f,  ' ch: %5.2f'%nlog10(m.church),
+        if m.frac   != None:  print >>f,  ' f: %5.2f'%(m.frac),
+        if m.E_site != None:  print >>f,  ' Es: %5.2f'%nlog10(m.E_site),
+        if m.E_seq != None:  print >>f,  ' Eq: %5.2f'%(nlog10(m.E_seq)),
+        if m.MNCP   != None:  print >>f,  ' mn: %5.2f'%(m.MNCP),
+        if m.ROC_auc!= None:  print >>f,  ' Ra: %6.4f'%(m.ROC_auc),
+        if m.E_chi2 != None:
+            if m.E_chi2 == 0: m.E_chi2=1e-20
+            print >>f,  ' x2: %5.2f'%(nlog10(m.E_chi2)),
+        if m.CRA    != None:  print >>f,  ' cR: %6.4f'%(m.CRA),
+        if m.Cfrac  != None:  print >>f,  ' Cf: %6.4f'%(m.Cfrac),
+        if m.realpvalue != None: print >>f,  ' P: %6.4e'%(m.realpvalue)
+        if m.kellis != None:  print >>f,  ' k: %5.2f'%(m.kellis),
+        try:
+            if m.numbound      :  print >>f,  ' b: %3d'%(m.numbound),
+            if m.nummotif      :  print >>f,  ' nG: %3d'%(m.nummotif),
+            if m.numboundmotif :  print >>f,  ' bn: %3d'%(m.numboundmotif),
+        except: pass
+        print >>f, ''
+
+        _max = m.maxscore
+        m.maxscore = -100
+        if kmer_count >= 0:
+            seqs = m.bogus_kmers(kmer_count)
+        else:
+            seqs = m.seqs
+
+        for seq in seqs:
+            print >>f,  seq,i,m.scan(seq)[2][0]
+
+        m.maxscore = _max
+        print >>f,  '*'*m.width
+        print >>f,  "MAP Score: %f"%(m.MAP)
+
+def nlog10(x,min=1e-323):
+    """returns -log10(x) with a maximum default value of 323."""
+    if x < min: x=min
+    try:
+        return math.fabs(math.log(x)/math.log(10))
+    except:
+        return 0
+
+def txt2motifs(txt,VERBOSE=1):
+    """Convert a text string into a list of motifs:
+    Examples:
+
+    'TGASTCA,GAATC'      --> 2 motifs from ambiguity codes
+    'results.tamo'       --> All motifs in TAMO-format file
+    'results.tamo:34,45' --> Motifs 34 and 45 in TAMO-format file
+    'results.pickle'     --> All motifs in pickle (list or dict of Motifs)
+    'results.pickle%GAL4 --> 'GAL4' entry in results.pickle dictionary
+    'results.pickle:34,45 -> Motifs 34 and 45 in results.pickle list
+    """
+    motifs = []
+    exists = os.path.exists
+    toks   = txt.split(':')
+    if exists(toks[0]):               #It's a file!!
+        fname = toks[0]
+        if fname.find('.pickle') > 0: #It's a pickle!!
+            return pickletxt2motifs(toks)
+        else:                         #It's a "Motif" file!!
+            if VERBOSE:
+                print "# Loading motif from %s"%fname
+            allmotifs = load(fname)
+        if len(toks) == 1: motifs = allmotifs
+        else:
+            idxs   = [int(x) for x in toks[1].split(',')]
+            motifs = [allmotifs[x] for x in idxs]
+    else:                             #It's a text string!!
+        fname = 'TXT'
+        for t in txt.split(','):
+            motifs.append(Motif_from_text(t))
+    for i in range(len(motifs)): motifs[i].index = i
+    for i in range(len(motifs)): motifs[i].file = fname
+    return motifs
+
+def pickletxt2motifs(toks):
+    """[Utility function] See txt2motifs documentation."""
+    fname = toks[0]
+    print "# Loading motif pickle from %s"%fname
+    F = open(fname,'r')
+    DA = pickle.load(F)
+    F.close()
+    ans = []
+    if type(DA) == type({}):
+        if len(toks) > 1:
+            keys = [x.replace('%',' ') for x in toks[1].split(',')]
+            for k in keys: ans.append(DA[k])
+        else:
+            for k in DA.keys(): DA[k].key = k
+            ans = DA.values()
+    else: #Assuming DA is a list
+        if len(toks) > 1:
+            idxs = [int(x) for x in toks[1].split(',')]
+            ans  = [DA[x] for x in idxs]
+        else:
+            ans  = DA
+    return ans
+    
+
+def sortby(motiflist, property, REV=0):
+    """Sort a motif list according to a particular property"""
+    mtype = type(Motif())
+    for m in motiflist:
+        if type(m) != mtype:
+            print "Not a Motif Object: ",m
+            return
+    try:
+        motiflist.sort(lambda x,y,p=property: cmp(x.__dict__[p],y.__dict__[p]))
+        if REV: motiflist.reverse()
+    except:
+        print 'Could not sort list.  Probably, the specificied property "%s" is not posessed by all motifs'%property
+    
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/src/chipsequtil/nib.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,393 @@
+'''Functions and classes used to interface with .nib files as created by Jim
+Kent's nibFrag and faToNib utilities.'''
+
+import glob
+import math
+import os
+import struct
+import sys
+import warnings
+from cStringIO import StringIO
+from collections import defaultdict as dd
+
+from chipsequtil import reverse_complement, get_file_parts, BEDFile
+
+
+# module fields
+NOMASK,MASK,HARDMASK = range(3)
+
+
+class NibException(Exception) : pass
+
+
+def _nib_fd(nib) :
+    '''Returns filename and file descriptor for nib, detecting whether it is a \
+    path or fd appropriately'''
+
+    # check to see if nib is a file or a string
+    if isinstance(nib,file) :
+        nib_fn = nib.name
+        nib.seek(0)
+        nib_f = nib
+    elif isinstance(nib,str) :
+        nib_fn = nib
+        nib_f = open(nib,'rb')
+    else :
+        raise NibException('Incompatible .nib argument %s with type %s, needs to \
+        be either <type \'file\'> or <type \'str\'>'%(str(nib),type(nib)))
+
+    return nib_fn, nib_f
+
+
+def get_nib(nib,start=0,end=-1,strand='+',mask=NOMASK,name=None,dbHeader=None,tbaHeader=None) :
+    '''Return a (header,sequence) tuple representing this nibFrag record'''
+    headers = get_nib_header_batch(nib,[(start,end,strand,name,dbHeader,tbaHeader),])
+    seqs = get_nib_seq_batch(nib,[(start,end,strand)],mask)
+    return headers[0], seqs[0]
+
+
+def get_nib_batch(nib,queries,mask=NOMASK) :
+    '''Batch interface for fetching fasta records.  Returns tuple of lists
+    (headers,sequences)'''
+    headers = get_nib_header_batch(nib,queries)
+    seqs = get_nib_seq_batch(nib,[x[:3] for x in queries],mask=mask)
+    return headers, seqs
+
+
+def get_nib_seq(nib,start=0,end=-1,strand='+',mask=NOMASK) :
+    '''Extract subsequence from .nib file like Jim Kent's nibFrag utility.
+    Default behavior is to return the entire sequence.
+
+    Extract the nucleotide substring defined by the closed interval [start,end]
+    from the sequence found in *nib_fn*.  *mask* parameter has the following
+    possible values:
+
+    chipsequtil.nib.NOMASK -- masked positions are not indicated (default)
+    chipsequtil.nib.MASK -- masked positions are capitalized, normal bases lower case
+    chipsequtil.nib.NOMASK -- masked positions are replaced with Ns
+    '''
+    return get_nib_seq_batch(nib,[(start,end,strand)],mask)[0]
+
+
+def get_nib_header(nib_fn,start=0,end=-1,strand='+',name=None,dbHeader=None,tbaHeader=None) :
+    '''Method for constructing fasta headers compliant with nibFrag utility'''
+    headers = get_nib_header_batch(nib,[(start,end,strand,name,dbHeader,tbaHeader),])
+    return headers[0]
+
+
+def get_nib_header_batch(nib,queries) :
+    '''Batch method for creating nibFrag headers.  *queries* is a list of at most
+    6-tuples (start,end,strand,name,dbHeader,tbaHeader) representing queries as
+    specified by the original nibFrag utility.  Only start, end, and strand
+    fields are required.'''
+
+    nib_path, nib_f = _nib_fd(nib)
+
+    nib_dir,nib_fn,nib_base,nib_ext = get_file_parts(nib_path)
+    nbases = validate_nib_file(nib)
+    headers = []
+    header_tmpl = '>%(name)s%(db)s\n'
+
+    for rec in queries :
+
+        # set some defaults if they are not supplied
+        rec = list(rec)
+        rec.extend([None]*(6-len(rec)))
+        start, end, strand, name, dbHeader, tbaHeader  = rec
+
+        if end == -1 :
+            end = nbases
+        fields = {}
+        fields['name'] = nib_path+':%d-%d'%(start,end) if not name else name
+        fields['db'] = ''
+
+        if tbaHeader :
+            # ignored for some reason in nibFrag when tbaHeader supplied and dbHeader is not
+            fields['name'] = '' if not dbHeader else fields['name']
+            fields['db'] = '%s.%s:%d-%d of %d'%(tbaHeader,nib_base,start,end,nbases)
+        if dbHeader :
+            fields['db'] = ':%s.%s:%d-%d:%s:%d'%(dbHeader,nib_base,start,end,strand,nbases)
+
+        headers.append(header_tmpl%fields)
+
+    return headers
+
+
+def validate_nib_file(nib) :
+    '''Validate .nib file header, returning number of bases indicated if successful.
+    *nib* argument is either a filename or an open file object.
+    '''
+
+    nib_fn, nib_f = _nib_fd(nib)
+
+    # first 4 bytes are a nib file signature
+    #TODO - consider attempting to figure out byte order to make truly cross platform
+    def_sig = 0x6BE93D3A
+    sig = struct.unpack('=l',nib_f.read(4))[0]
+    if def_sig != sig :
+        raise NibException('Invalid nib file signature in %s, found %s, expected \
+        %s, perhaps .nib file as not created on this platform?\n\nnibFrag style \
+        error: %s is not not a good .nib file.'%(nib_fn,hex(sig),hex(def_sig),nib_fn))
+
+    # second 4 bytes are number of bases in sequence
+    nbases = struct.unpack('=l',nib_f.read(4))[0]
+
+    return nbases
+
+
+def get_nib_seq_batch(nib,queries,mask=NOMASK) :
+    '''Extract subsequence from .nib file like Jim Kent's nibFrag utility.
+
+    Extract the nucleotide substrings defined by the closed intervals in *queries*
+    from the sequence found in *nib*.  *nib* argument is either a filename or
+    an open file object.  Entries in *queries* are 3-tuples defining (start,end,strand)
+    sequence coordinates. Sequences are returned in order in a list as
+    strings.  *mask* parameter has the following possible values:
+
+    chipsequtil.nib.NOMASK -- masked positions are not indicated (default)
+    chipsequtil.nib.MASK -- masked positions are capitalized, normal bases lower case
+    chipsequtil.nib.NOMASK -- masked positions are replaced with Ns
+    '''
+
+    nib_fn, nib_f = _nib_fd(nib)
+
+    nbases = validate_nib_file(nib_f)
+
+    # rest of file is sequence, with each nibble (4 bytes) being a base as \
+    # follows (from http://genome.ucsc.edu/FAQ/FAQformat.html#format8) :
+    #
+    # 0 - T
+    # 1 - C
+    # 2 - A
+    # 3 - G
+    # 4 - N
+    #
+    # The most significant bit in a nibble is set if the base is masked
+    trans_nuc = 'tcagn'
+
+    # start translating the nibbles into nucleotides
+    def trans_nib(nib) :
+        nuc = trans_nuc[nib&7]
+        mask_bit = nib & 8
+        if mask in [MASK,HARDMASK] and mask_bit == 0 :
+            return nuc.upper()
+        if mask == HARDMASK and mask_bit == 1 :
+            return 'N'
+        return nuc
+
+    headers = [] # stores headers
+    seqs = [] # stores sequences
+
+    # sort the coords so we can walk most efficiently through the file
+    queries.sort()
+
+    for start, end, strand in queries :
+
+        if start < 0 :
+            raise NibException('Received negative start coordinate, this may '\
+                               'indicate a region on mitochondrial DNA that '\
+                               'spans reference sequence start and end.  This '\
+                               'utility cannot handle these cases, aborting. '\
+                               'Requested interval: %s (%d,%d)'%(nib_fn,start,end))
+
+        start, end = map(int,(start,end))
+
+        # end == -1 means caller wants entire sequence
+        if end == -1  :
+            end = nbases
+
+        if any([nbases < c for c in [start,end]]) :
+            raise NibException(('Requested slice (%(start)d,%(end)d) not compatible ' \
+            'with sequence of length %(nbases)d in %(nib_fn)s, aborting\n\nnibFrag '\
+            'style error: nib read past end of file (%(start)d %(end)d) in file: '\
+            '%(nib_fn)s')%{'start':start,'end':end,'nbases':nbases,'nib_fn':nib_fn})
+
+        # figure out how many bytes to read through
+        start_byte,rem_byte = start/2,start%2
+
+        # calculate where we need to move to in the file from the current location
+        # + 8 is from the 2*4 bytes header info in the .nib format
+        byte_offset = start_byte-nib_f.tell() + 8
+        nib_f.seek(byte_offset,1) # seek forward to the beginning byte from current location
+        seq_bytes,seq_rem_byte = int(math.ceil((end-start+rem_byte)/2.)),(end+1)%2
+        seq_bytes = nib_f.read(seq_bytes+seq_rem_byte)
+
+        # start translating the bytes
+        seq = StringIO() # we use StringIO because it is more efficient than concatenating strings
+        for c in seq_bytes :
+            c_byte = struct.unpack('=b',c)[0]
+
+            # higher nibble
+            c_nib = (c_byte & (15<<4))>>4
+            nuc = trans_nib(c_nib)
+            seq.write(nuc)
+
+            # lower nibble
+            c_nib = int(c_byte) & 15
+            nuc = trans_nib(c_nib)
+            seq.write(nuc)
+
+        # final nucleotide sequence
+        seq_str = seq.getvalue()
+
+        # if we're reading to the end, don't clip anything
+        if end != nbases :
+            # if the coordinate requested was not on a byte boundary, adjust
+            if rem_byte == 1 :
+                seq_str = seq_str[1:]
+            if seq_rem_byte == 1 :
+                seq_str = seq_str[:-1]
+
+            # nibFrag apparently uses zero-based indexing, clip off one base
+            seq_str = seq_str[:-1]
+        seq.close()
+
+        # adjust strand
+        if strand == '-' :
+            seq_str = reverse_complement(seq_str)
+        seqs.append(seq_str)
+
+    return seqs
+
+
+class SeqDBException(Exception): pass
+class NibDBException(Exception): pass
+
+
+class SeqDB(object) :
+    '''Base class for different kinds of sequence databases.  Does nothing,
+    implement subclasses.  Constructor rovides _db_map and db_info class members.'''
+    def __init__(self) :
+        self._db_map = {}
+        self.db_info = dd(dict)
+
+    def get_seq(self,*args, **kwargs) :
+        raise SeqDBException('Base class SeqDB has no get_seq implementation')
+
+
+class NibDB(SeqDB) :
+    '''Class providing an interface to a set of .nib files as created by faToNib
+    in Jim Kent's software suite.
+
+    Sequences are identified by the basename of the .nib file without the .nib
+    extension, e.g. chr1.nib is identified as chr1.
+
+    Some potentially useful information about the entries in the database is
+    stored in the *nib_info* dictionary.
+    '''
+
+    def __init__(self,nib_fns=[],nib_dirs=[]) :
+        '''*nib_fns* is a list of paths to specific .nib files desired for the
+        NibDB.  *nib_dirs* is a list of paths to directories containing .nib
+        files such that every .nib file in the directories is added to the NibDB.
+        Explicitly passed files take precedence over those found in directories
+        when sequence names collide.
+        '''
+        SeqDB.__init__(self)
+
+        # find all *.nib files in the directories passed
+        if isinstance(nib_dirs,str) : # user just provided single directory
+            nib_dirs = [nib_dirs]
+
+        dir_nibs = []
+        for d in nib_dirs :
+            dir_nibs.extend(glob.glob(os.path.join(d,'*.nib')))
+
+        if isinstance(nib_fns,str) :
+            nib_fns = [nib_fns]
+        # for each .nib found, add to db
+        # if there is a collision of names, those specified in files (not dirs)
+        # takes precedence without warning
+        for fn in dir_nibs+nib_fns :
+
+            # open the nib file
+            nib_path,nib_fn,nib_base,nib_ext = get_file_parts(fn)
+            fn, nib_f = _nib_fd(fn)
+            self._db_map[nib_base] = nib_f
+
+            # store some info
+            self.db_info[nib_base]['path'] = fn
+            nbases = validate_nib_file(self._db_map[nib_base])
+            self.db_info[nib_base]['nbases'] = nbases
+
+    def __del__(self) :
+        '''import this
+        ...Explicit is better than implicit...
+        '''
+        for nib_f in self._db_map.values() :
+            nib_f.close()
+
+    def _get_db_map(self,name) :
+        '''Gets appropriate file handle for the requested name, raises NibDBException
+        if it cannot be found'''
+        try :
+            return self._db_map[name]
+        except KeyError :
+            raise NibDBException('Sequence name %s not found in NibDB'%name)
+
+    def get_fasta(self,name,start=0,end=-1,strand='+',mask=NOMASK) :
+        '''Get the fasta record for the specified arguments, returns (header,sequence)
+        tuple.'''
+
+        nib_f = self._get_db_map(name)
+        return get_nib(nib_f,start,end,strand,mask)
+
+    def get_fasta_batch(self,recs,mask=NOMASK) :
+        '''Batch version of *get_fasta* method.  *recs* is a list of lists/tuples
+        with (<chromo>,<start>,<end>,<strand>). Returns list of (header,sequence)
+        tuples in the same sequence as the input records.'''
+
+        # gather the records for each chromosome together
+        chrom_recs = dd(list)
+        for i,r in enumerate(recs) :
+            chrom_recs[r[0]].append((i,r)) # recs are (index,<tuple>)
+
+        # extract sequences
+        all_chrom_recs = []
+        for chrom, rec_list in chrom_recs.items() :
+            # sorted lists make sequence extraction efficient
+            rec_list.sort(key=lambda x: x[1][1]) # recs are (index,<tuple>)
+
+            # separate indexes from records, extract for this chromo
+            indexes, c_recs = zip(*rec_list)
+
+            # get_nib_batch requires list of (<start>,<end>,<strand>) tuples, remove
+            # chromo in first position
+            c_recs = [r[1:] for r in c_recs]
+
+            nib_f = self._get_db_map(chrom)
+            headers, seqs = get_nib_batch(nib_f,c_recs,mask)
+
+            # return the sequences to a (index,(header,sequence)) list
+            all_chrom_recs.extend(zip(indexes,zip(headers,seqs)))
+
+        # put the sequences back in the original order
+        all_chrom_recs.sort(key=lambda x: x[0]) # recs are (index,<tuple>) again
+        indexes, recs = zip(*all_chrom_recs)
+
+        return zip(*recs)
+
+    def get_fasta_from_bed(self,bed,mask=NOMASK) :
+        '''Accepts either a chipsequtil.BEDFile instance or a filename for a BED
+        file (used to construct a BEDFile instance) and returns the fasta
+        records for all records in order.'''
+
+        # determine if *bed* is a filename or a BEDFile
+        if isinstance(bed,str) : # filename
+            bed = BEDFile(bed)
+
+        # construct the records
+        recs = []
+        for rec in bed :
+            if rec['chrom'].lower().startswith('track') : # track line, skip
+                continue
+            recs.append((rec['chrom'],int(rec['chromStart']),int(rec['chromEnd']),rec['strand']))
+
+        return self.get_fasta_batch(recs,mask)
+
+    def get_seq(self,name,start=0,end=-1,strand='+',mask=NOMASK) :
+        '''Extract sequence from sequence *name*. Other arguments are passed
+        directly to *get_nib_seq* function.'''
+
+        nib_f = self._get_db_map(name)
+        return get_nib_seq(nib_f,start,end,strand,mask)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/src/chipsequtil/plotting.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,24 @@
+import math
+
+from matplotlib.pyplot import hist, plot, savefig, title, show, xticks, yticks, figure, clf
+
+from chipsequtil import get_gc_content
+
+def plot_gc_content(sequences,bins=10,fn=None) :
+
+    # calculate all the GC contents, sort them
+    gc_contents = map(get_gc_content,sequences)
+    gc_contents.sort()
+
+    f = figure()
+    points = hist(gc_contents,bins=bins)
+    if fn :
+        savefig(fn)
+    else :
+        show()
+    clf()
+
+
+def plot_pos_neg_peaks(pos_peaks,neg_peaks) :
+    '''Plot # pos peaks/# neg peaks by p-value'''
+    pass
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/src/chipsequtil/sampling.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,252 @@
+
+import math
+import random
+import re
+import sys
+from collections import defaultdict
+
+from chipsequtil import get_org_settings, get_gc_content, get_gc_content_distribution, RefGeneFile
+from nib import NibDB, NibException
+
+def kl_divergence(p,q) :
+    """Return Kullback-Leibler divergence for two probability distributions
+    p and q.  p and q should be indexable objects of the same length where
+    p_i corresponds to q_i.
+    """
+    kl_sum = 0.
+    for p_i, q_i in zip(p,q) :
+        if p_i != 0 and q_i != 0 :
+            kl_sum += p_i * math.log(p_i/q_i)
+    return kl_sum
+
+def rejection_sample_bg(fg_dict,organism,bins=100,num_samples=None,verbose=False,
+                        bg_match_epsilon=1e-3) :
+    '''Generate background sequences according to the size, distance from genes,
+    and GC content distributions of the supplied foreground sequences.  *fg_dict*
+    is a dictionary of <header>:<sequence> items, where the first part of the
+    header must contain:
+
+    >chrX:<start>-<end>
+
+    *organism* is a string that will be used to call the *chipsequtil.get_org
+    settings* function and uses the 'genome_dir' and 'annotation_path' keys.
+    *bins* is the number of bins to use for representing the GC content
+    distribution.  Function returns a dictionary of <header>:<sequence> items
+    of generated background sequences.'''
+
+    nib_db = NibDB(nib_dirs=[get_org_settings(organism)['genome_dir']])
+    tss_fn = get_org_settings(organism)['annotation_path']
+    tss = defaultdict(list)
+    for rec in RefGeneFile(tss_fn) :
+        tss[rec['chrom']].append((int(rec['txStart']),int(rec['txEnd']),))
+
+    # for each peak find the chromosome, distance to nearest
+    # gene, size of peaks in bases, and GC content
+    num_samples = len(fg_dict) if not num_samples else num_samples
+    dists,sizes=[],[]
+
+    for header,seq in fg_dict.items() :
+
+        # chromosome first field in fasta headers from bed2seq.bedtoseq
+        chrom = header.split(':')[0]
+
+        # adjust chromosomes in special cases
+        if re.search('random',chrom.lower()) or chrom.lower() == 'chrm' :
+            continue
+
+        # start first int in second field of bed2seq.bedtoseq header
+        start = int(header.split(':')[1].split('-')[0])
+        midpoint = start + len(seq)/2
+
+        # figure out which chromosome we're working on
+        tss_chr = tss[chrom]
+
+        # dsts_to_genes is the distance of this peak from all the genes, find minimum
+        dists_to_genes = [(s[0]-midpoint) for s in tss_chr]
+        try :
+            min_dist = min(dists_to_genes,key=lambda x : abs(x))
+            dists.append(min_dist)
+        except :
+            err_str = 'Warning: no genes were found for sequence with header' \
+                         ' %s, not using to calculate distributions.\n'%header
+            sys.stderr.write(err_str)
+
+        # calculate # bases
+        sizes.append(len(seq))
+
+    # GC content distribution for the foreground sequences
+    gc_dist = get_gc_content_distribution(fg_dict.values(),bins=bins)
+
+    # max_gc is # peaks w/ highest GC content
+    max_gc = max(gc_dist)
+
+    # gene_starts is a list of all genes in (chromosome,gene start) tuples
+    gene_starts=[]
+    for key in tss.keys():
+        chrom=key.split('chr')[-1]
+        for x in tss[key]:
+            gene_starts.append((key,x[0]))
+
+    # encapsulated function for proposing sequences
+    def propose_sequence(dists, gene_starts, sizes, nib_db) :
+        # sample a random distance from the list of distances
+        d = random.choice(dists)
+
+        # pick a random gene
+        chrom, coord = random.choice(gene_starts)
+
+        # propose a starting point for the bg sequence
+        midpoint = coord-d+random.randint(-100,100)
+
+        # propose a size for the bg sequence
+        size = random.choice(sizes)
+        start = int(midpoint-int(size/2))
+        stop = int(midpoint+int(size/2))
+
+        #sys.stderr.write("%s:coord=%d size=%d midpoint=%d d=%d\n"%(chrom,coord,size,midpoint,d))
+        # if start or stop are negative, skip and try again
+        if start < 0 or stop < 0 : seq = None
+
+        # randomly choose strand
+        strand = '+' if random.random() > 0.5 else '-'
+
+        # extract the proposed sequence
+        try :
+            nib_title, seq = nib_db.get_fasta(chrom,start,stop,strand)
+        except IOError, e :
+            if verbose : sys.stderr.write('IOError in NibDB, skipping: %s,%d-%d,%s\n'%(chrom,start,stop,strand))
+            seq = None
+        except NibException, e :
+            if verbose : sys.stderr.write('NibDB.get_fasta error, %s\n'%e)
+            seq = None
+
+        header = '%s:%d-%d'%(chrom,start,stop)
+
+        return header, seq
+
+
+    # build gc content distribution based on seq length and
+    # distance from TSS foreground distributions
+    # keep sampling sequences until the distribution stops
+    # changing a lot (KL divergence < epsilon)
+    bg_gc_cnts = [1.]*bins
+    converged = False
+    epsilon = bg_match_epsilon
+    if verbose : sys.stderr.write('Building empirical background GC content distribution\n')
+    while not converged :
+
+        # propose a sequence
+        header, seq = propose_sequence(dists,gene_starts,sizes,nib_db)
+
+        # sometimes this happens when there is an error, just try again
+        if seq is None :
+            continue
+
+        # determine the GC bin for this sequence
+        gc_content = get_gc_content(seq)
+        gc_bin = -1
+        for i in range(bins) :
+            win_start = i/float(bins)
+            win_end = (i+1)/float(bins)
+            if gc_content >= win_start and gc_content < win_end :
+                gc_bin = i
+                break
+
+        # update the gc content distribution
+        sum_cnts = float(sum(bg_gc_cnts))
+        if sum_cnts != 0 : # ! on first sequence
+
+            # calculate the current distributions
+            last_gc_p = map(lambda x:x/sum_cnts,bg_gc_cnts)
+            bg_gc_cnts[gc_bin] += 1
+            new_gc_p = map(lambda x:x/sum_cnts,bg_gc_cnts)
+
+            # calculate the kl divergence between last distribution
+            # and current one, stopping if less than epsilon
+            kl_d = kl_divergence(new_gc_p,last_gc_p)
+            if verbose : sys.stderr.write('dist to converge: %.3g\r'%(kl_d-epsilon))
+            if kl_d < epsilon :
+                converged = True
+
+        else :
+            bg_gc_cnts[gc_bin] += 1
+
+    if verbose : sys.stderr.write('\ndone\n')
+
+    # add pseudocounts to account for missing data in bg as to avoid
+    # inappropriate scaling in rejection sampling step
+    # the fg bin with the largest value that corresponds to an empty
+    # bg bin is used to calculate the number of pseudocounts so that
+    # the resulting bg bin has the same propotion of counts in it as
+    # the original fg bin.  This is calculated as:
+    #
+    # x_{pseudo} = \frac{p_i\sum_{i=1}^{N}a_i}{1-p_iN}
+    #
+    # where p_i is the value of the max fg bin w/ zero in the bg bin
+    # x_{pseudo} is added to every bin
+    pseudocounts = 0
+    for fg_i, bg_i in zip(gc_dist,bg_gc_cnts) :
+        if fg_i != 0 and bg_i == 0 and fg_i*len(fg_dict) > pseudocounts :
+            # if fg_i > 1/sum(bg_gc_cnts) this won't work, but that *shouldn't*
+            # ever happen
+            if fg_i >= 1./sum(bg_gc_cnts) :
+                raise Exception('There was a numeric issue in the rejection sampling routine, please try it again')
+            sys.stderr.write(str([fg_i,sum(bg_gc_cnts),len(bg_gc_cnts),1.*fg_i*len(bg_gc_cnts),bg_gc_cnts])+'\n')
+            sys.stderr.flush()
+            pseudocounts = (fg_i*sum(bg_gc_cnts))/(1-1.*fg_i*len(bg_gc_cnts))
+
+    bg_gc_cnts = map(lambda x: x+pseudocounts/sum(bg_gc_cnts),bg_gc_cnts)
+    bg_gc_dist = map(lambda x: x/sum(bg_gc_cnts),bg_gc_cnts)
+
+    # last, find the multiplier that causes the background gc distribution to
+    # envelope the foreground gc dist
+    z_coeff = gc_dist[0]/bg_gc_dist[0]
+    for fg_i, bg_i in zip(gc_dist[1:],bg_gc_dist[1:]) :
+        z_coeff = max(z_coeff,fg_i/bg_i)
+    bg_gc_dist = map(lambda x: x*z_coeff,bg_gc_dist)
+
+    # start generating bg sequences
+    bg_dict = {}
+
+    bg_gcs,bg_sizes=[],[]
+
+    # generate a bg sequence for every fg sequence
+    for i in range(num_samples):
+        if verbose : sys.stderr.write('%d/%d'%(i,num_samples))
+
+        # propose sequences until one is accepted
+        accepted_sequence = False
+        while not accepted_sequence:
+            if verbose : sys.stderr.write('.')
+
+            # propose a sequence
+            header, seq = propose_sequence(dists,gene_starts,sizes,nib_db)
+
+            # problem occured in proposing sequence, just keep going
+            if seq is None : continue
+
+            # determine the GC bin for this sequence
+            gc_content = get_gc_content(seq)
+            gc_bin = -1
+            for i in range(bins) :
+                win_start = i/float(bins)
+                win_end = (i+1)/float(bins)
+                if gc_content >= win_start and gc_content < win_end :
+                    gc_bin = i
+                    continue
+
+            # pick a uniform random number such that it does not exceed
+            # the maximum GC content distribution over bins
+            # if the random number is <= the GC content for this
+            # proposed sequence, accept, otherwise reject
+            r = random.random() * bg_gc_dist[gc_bin]
+            if r > gc_dist[gc_bin] :
+                continue
+            else:
+                bg_gcs.append(x)
+                #bg_sizes.append(size)
+                accepted_sequence = True
+                bg_dict[header] = seq
+
+        if verbose : sys.stderr.write('\r')
+    return bg_dict
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/src/chipsequtil/seq.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,265 @@
+from itertools import izip
+from textwrap import wrap
+
+# FASTA functions and classes
+def fasta_itr(f) :
+    '''Returns a generator that iterates through a FASTA formatted file.
+    *f* may be either a text or gzipped file, or a file-like python object
+    representing either of these.  Records are returned in the order they
+    are found.'''
+    if isinstance(f,str) :
+        f = open(f)
+
+    # check for magic number 1f 8b indicating gzip file, I dunno, just cuz
+    if f.read(2) == "\x1f\x8b" : f = gzip.GzipFile(f)
+    else : f.seek(0)
+
+    curr_header, curr_seq = None, None
+    for r in f :
+        if r.startswith('>') :
+            if curr_header is not None :
+                yield (curr_header, curr_seq)
+            curr_header = r[1:].strip()
+            curr_seq = ''
+        else :
+            curr_seq += r.strip()
+    # return the last record
+    yield (curr_header,curr_seq)
+
+def fasta_to_dict(f) :
+    '''Returns a dictionary whose keys are FASTA headers and values are
+    sequences.  *f* may be a text, gzipped file, or a file-like
+    python object representing either of these.'''
+    return dict(fasta_itr(f))
+
+def write_fasta_to_file(fasta,f,linelen=None) :
+    '''Writes the FASTA records in *fasta* to file specified in *f*. *fasta*
+    may be a dictionary like that returned by *fasta_to_dict* or a *FASTAFile*
+    instance.  *f* may be a filename or a file-like object opened with write
+    mode.'''
+    if isinstance(fasta,dict) :
+        fasta_itr = fasta.iteritems()
+    else :
+        fasta_itr = fasta
+
+    if isinstance(f,str) :
+        f = open(str,'w')
+
+    for header, seq in fasta_itr :
+        if linelen is not None :
+            seq = fill(seq,linelen)
+        f.write('>%s\n%s\n'%(header,seq))
+    f.close()
+
+
+class FASTAFile(object) :
+    '''A file-like object providing information and statistics about the
+    sequences in a FASTA formatted file.  Efficiently iterates through a
+    text or gzipped FASTA file and provides sequential or random access to
+    the records.  Instances store header and sequence data as they are read.
+    
+      >>> fasta_str = StringIO(">seq1\\nACATAGGGAT\\n>seq2\\nTTATNTAGATA\\n")
+      >>> fasta_f = FASTAFile(fasta_str)
+      >>> [r for r in fasta_f]
+      [('seq1', 'ACATAGGGAT'), ('seq2', 'TTATNTAGATA')]
+      >>> fasta_f['seq1']
+      ACATAGGGAT
+      >>> fasta_f.headers
+      ['seq1', 'seq2']
+      >>> fasta_f.sequences
+      ['ACATAGGGAT', 'TTATNTAGATA']
+
+    Instances have the following members:
+
+    **headers**
+      list of FASTA headers in original order
+
+    **sequences**
+      list of FASTA sequences in original order
+
+    .. NOTE::
+       The members **headers** and **sequences** are not available until the
+       the FASTA records have been iterated once.
+
+    When indexing like `fasta_f['seq1']`, the class assumes all headers are
+    unique, iterating does not make this assumption.
+    '''
+
+    def __init__(self,f) :
+        self._f = f
+        self._fasta_itr = fasta_itr(f)
+        self.headers = []
+        self.sequences = []
+        self._dict = {}
+
+    def __getitem__(self,key) :
+        return self._dict[key]
+
+    def __setitem__(self,key,val) :
+        self._dict[key] = val
+
+    def next(self) :
+        '''Returns next FASTA record in the file as (header, sequence) tuple.'''
+
+        if self._fasta_itr is None :
+            self._fasta_itr = izip(self.headers,self.sequences)
+
+        try :
+            header, seq = self._fasta_itr.next()
+        except StopIteration, e :
+            self._fasta_itr = None
+            self._f = None
+            raise e
+
+        if self._f is not None : 
+            # this means we're not done reading through the file yet
+            self.headers.append(header)
+            self.sequences.append(seq)
+            self._dict[header] = seq
+
+        return header, seq
+
+    def __iter__(self) :
+        return self
+
+# FASTQ functions and classes
+def fastq_itr(f) :
+    '''Returns a generator that iterates through a FASTQ formatted file.
+    *f* may be either a text or gzipped file, or a file-like python object
+    representing either of these.  Records are returned in the order they
+    are found.'''
+    if isinstance(f,str) :
+        f = open(f)
+
+    # check for magic number 1f 8b indicating gzip file, I dunno, just cuz
+    if f.read(2) == "\x1f\x8b" : f = gzip.GzipFile(f)
+    else : f.seek(0)
+
+    SEQ, QUAL = 0,1
+    in_region = SEQ
+    curr_header, curr_seq, curr_qual = None, None, None
+    for r in f :
+        if r.startswith('@') :
+            if curr_header is not None :
+                yield (curr_header, (curr_seq, curr_qual))
+            curr_header = r[1:].strip()
+            curr_seq = ''
+            curr_qual = ''
+            in_region = SEQ
+        elif r.startswith('+') :
+            in_region = QUAL
+        else :
+            curr_field = r.strip()
+            if in_region == SEQ :
+                curr_seq += curr_field
+            elif in_region == QUAL :
+                curr_qual += curr_field
+
+    # return the last record
+    yield (curr_header,(curr_seq,curr_qual))
+
+def fastq_to_dict(f) :
+    '''Returns a dictionary whose keys are FASTQ headers and values are
+    sequences.  *f* may be a text, gzipped file, or a file-like
+    python object representing either of these.'''
+    return dict(fastq_itr(f))
+
+def write_fastq_to_file(fastq,f,linelen=None) :
+    '''Writes the FASTQ records in *fasta* to file specified in *f*. *fastq*
+    may be a dictionary like that returned by *fastq_to_dict* or a *FASTQFile*
+    instance.  *f* may be a filename or a file-like object opened with write
+    mode.'''
+    if isinstance(fastq,dict) :
+        fastq_itr = fasta.iteritems()
+    else :
+        fastq_itr = fasta
+
+    f_out = open(str,'w') if isinstance(f,str) else f
+
+    for header, (seq, qual) in fastq_itr :
+        if linelen is not None :
+            seq = fill(seq,linelen)
+        f_out.write('>%s\n%s\n'%(header,seq))
+
+    if isinstance(f,str) :
+        f_out.close()
+
+
+class FASTQFile(object) :
+    '''A file-like object providing information and statistics about the
+    sequences in a FASTQ formatted file.  Efficiently iterates through a
+    text or gzipped FASTQ file and provides sequential or random access to
+    the records.  Instances store header and sequence data as they are read
+    
+      >>> fastq_str = StringIO("@seq1\\nACATAGGGAT\\n+seq2\\nY^_cccQYJQ\\n
+      @seq2\\nTTATNTAGAT\\n+seq2\\nY^_cJcQQJQ")
+      >>> fastq_f = FASTQFile(fastq_str)
+      >>> [r for r in fastq_f]
+      [('seq1', ('ACATAGGGAT', 'Y^_cccQYJQ')), ('seq2', ('TTATNTAGATA', 'Y^_cJcQQJQ'))]
+      >>> fastq_f['seq1']
+      ('seq1', ('ACATAGGGAT', 'Y^_cccQYJQ'))
+      >>> fastq_f.headers
+      ['seq1', 'seq2']
+      >>> fastq_f.sequences
+      ['ACATAGGGAT', 'TTATNTAGAT']
+      >>> fastq_f.quals
+      ['Y^_cccQYJQ', 'Y^_cJcQQJQ']
+
+    Instances have the following members:
+
+    **headers**
+      list of FASTQ headers in original order
+
+    **sequences**
+      list of FASTQ sequences in original order
+
+    **quals**
+      list of FASTQ quality scores in original order
+
+    .. NOTE::
+       The members **headers**, **sequences**, and **quals** are not available
+       until the the FASTQ records have been iterated once
+
+    When indexing like `fastq_f['seq1']`, the class assumes all headers are
+    unique, iterating does not make this assumption.
+    '''
+
+    def __init__(self,f) :
+        self._f = f
+        self._fastq_itr = fastq_itr(f)
+        self.headers = []
+        self.sequences = []
+        self.quals = []
+        self._dict = {}
+
+    def __getitem__(self,key) :
+        return self._dict[key]
+
+    def __setitem__(self,key,val) :
+        self._dict[key] = val
+
+    def next(self) :
+        '''Returns next FASTA record in the file as (header, sequence) tuple.'''
+
+        if self._fastq_itr is None :
+            self._fastq_itr = izip(self.headers,self.sequences)
+
+        try :
+            header, (seq, qual) = self._fastq_itr.next()
+        except StopIteration, e :
+            self._fastq_itr = None
+            self._f = None
+            raise e
+
+        if self._f is not None : 
+            # this means we're not done reading through the file yet
+            self.headers.append(header)
+            self.sequences.append(seq)
+            self.quals.append(qual)
+            self._dict[header] = (seq, qual)
+
+        return header, (seq, qual)
+
+    def __iter__(self) :
+        return self
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/src/chipsequtil/util.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,131 @@
+"""Utility/helper classes and functions used by the chipsequtil package.
+"""
+
+import textwrap
+
+from optparse import IndentedHelpFormatter
+
+class MultiLineHelpFormatter(IndentedHelpFormatter) :
+    """An OptionParser formatter that preserves newline characters in
+    description and epilog fields and word-wraps all sequences of text
+    not interrupted by newline characters.
+    """
+
+    def _format_text(self, text) :
+        """Wrap paragraphs of text individually separated by
+        newlines (preserves explicit newline characters).
+        """
+        text_width = self.width - self.current_indent
+        indent = " "*self.current_indent
+        output_text = []
+        paragraphs = text.split('\n')
+        for p in paragraphs :
+            output_text.append(textwrap.fill(p,
+                                             text_width,
+                                             initial_indent=indent,
+                                             subsequent_indent=indent))
+        return '\n'.join(output_text)
+
+
+
+
+# A binary ordered tree example
+# shamelessly copied from: http://code.activestate.com/recipes/286239-binary-ordered-tree/
+class CNode:
+    left , right, data = None, None, 0
+
+    def __init__(self, data):
+        # initializes the data members
+        self.left = None
+        self.right = None
+        self.data = data
+
+
+class KeyedBinaryTree : # do this later...
+    pass
+
+
+class CBOrdTree:
+    def __init__(self):
+        # initializes the root member
+        self.root = None
+
+    def addNode(self, data):
+        # creates a new node and returns it
+        return CNode(data)
+
+    def insert(self, root, data):
+        # inserts a new data
+        if root == None:
+            # it there isn't any data
+            # adds it and returns
+            return self.addNode(data)
+        else:
+            # enters into the tree
+            if data <= root.data:
+                # if the data is less than the stored one
+                # goes into the left-sub-tree
+                root.left = self.insert(root.left, data)
+            else:
+                # processes the right-sub-tree
+                root.right = self.insert(root.right, data)
+            return root
+
+    def lookup(self, root, target):
+        # looks for a value into the tree
+        if root == None:
+            return 0
+        else:
+            # if it has found it...
+            if target == root.data:
+                return 1
+            else:
+                if target < root.data:
+                    # left side
+                    return self.lookup(root.left, target)
+                else:
+                    # right side
+                    return self.lookup(root.right, target)
+
+    def minValue(self, root):
+        # goes down into the left
+        # arm and returns the last value
+        while(root.left != None):
+            root = root.left
+        return root.data
+
+    def maxDepth(self, root):
+        if root == None:
+            return 0
+        else:
+            # computes the two depths
+            ldepth = self.maxDepth(root.left)
+            rdepth = self.maxDepth(root.right)
+            # returns the appropriate depth
+            return max(ldepth, rdepth) + 1
+
+    def size(self, root):
+        if root == None:
+            return 0
+        else:
+            return self.size(root.left) + 1 + self.size(root.right)
+
+    def printTree(self, root):
+        # prints the tree path
+        if root == None:
+            pass
+        else:
+            self.printTree(root.left)
+            print root.data,
+            self.printTree(root.right)
+
+    def printRevTree(self, root):
+        # prints the tree path in reverse
+        # order
+        if root == None:
+            pass
+        else:
+            self.printRevTree(root.right)
+            print root.data,
+            self.printRevTree(root.left)
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil-master/uninstall.py	Mon Mar 07 16:18:10 2016 -0500
@@ -0,0 +1,31 @@
+#TODO this doesn't work yet - consider doing this later, use the install 
+# -f|--force option for now
+
+# distutils doesn't handle uninstalling things, this class deletes all the files
+# this package installs if it has appropriate permissions to do it, otherwise
+# print out the files that must be deleted to uninstall
+class uninstall(build_py) :
+  def run(self) :
+
+
+    # delete modules
+    print self.distribution.py_modules
+
+    # delete extensions
+    print self.distribution.ext_modules
+
+    # delete packages
+    print self.distribution.packages
+
+    # delete package data
+    print self.distribution.package_data
+
+    # delete scripts
+    print self.distribution.scripts
+
+    print self.distribution.get_command_obj('install').get_outputs()
+
+  def remove_path(self,path) :
+    '''Attempt to remove the specified path, returning non-zero status code on error'''
+
+