Mercurial > repos > alenail > chipsequtil_old

Binary file chipsequtil-master/docs/._Makefile has changed
Binary file chipsequtil-master/docs/._get_script_help.py has changed
Binary file chipsequtil-master/docs/._source has changed
--- a/chipsequtil-master/docs/Makefile	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,89 +0,0 @@
-# Makefile for Sphinx documentation
-#
-
-# You can set these variables from the command line.
-SPHINXOPTS    =
-SPHINXBUILD   = sphinx-build
-PAPER         =
-BUILDDIR      = build
-
-# Internal variables.
-PAPEROPT_a4     = -D latex_paper_size=a4
-PAPEROPT_letter = -D latex_paper_size=letter
-ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
-
-.PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex changes linkcheck doctest
-
-help:
-	@echo "Please use \`make <target>' where <target> is one of"
-	@echo "  html      to make standalone HTML files"
-	@echo "  dirhtml   to make HTML files named index.html in directories"
-	@echo "  pickle    to make pickle files"
-	@echo "  json      to make JSON files"
-	@echo "  htmlhelp  to make HTML files and a HTML help project"
-	@echo "  qthelp    to make HTML files and a qthelp project"
-	@echo "  latex     to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
-	@echo "  changes   to make an overview of all changed/added/deprecated items"
-	@echo "  linkcheck to check all external links for integrity"
-	@echo "  doctest   to run all doctests embedded in the documentation (if enabled)"
-
-clean:
-	-rm -rf $(BUILDDIR)/*
-
-html:
-	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
-	@echo
-	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
-
-dirhtml:
-	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
-	@echo
-	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
-
-pickle:
-	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
-	@echo
-	@echo "Build finished; now you can process the pickle files."
-
-json:
-	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
-	@echo
-	@echo "Build finished; now you can process the JSON files."
-
-htmlhelp:
-	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
-	@echo
-	@echo "Build finished; now you can run HTML Help Workshop with the" \
-	      ".hhp project file in $(BUILDDIR)/htmlhelp."
-
-qthelp:
-	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
-	@echo
-	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
-	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
-	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/ChIPSeqUtil.qhcp"
-	@echo "To view the help file:"
-	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/ChIPSeqUtil.qhc"
-
-latex:
-	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
-	@echo
-	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
-	@echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \
-	      "run these through (pdf)latex."
-
-changes:
-	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
-	@echo
-	@echo "The overview file is in $(BUILDDIR)/changes."
-
-linkcheck:
-	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
-	@echo
-	@echo "Link check complete; look for any errors in the above output " \
-	      "or in $(BUILDDIR)/linkcheck/output.txt."
-
-doctest:
-	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
-	@echo "Testing of doctests in the sources finished, look at the " \
-	      "results in $(BUILDDIR)/doctest/output.txt."
--- a/chipsequtil-master/docs/get_script_help.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,99 +0,0 @@
-#!/usr/bin/env python
-
-import glob
-import signal
-import time
-from subprocess import Popen, PIPE
-from textwrap import TextWrapper
-
-class Alarm(Exception):
-    pass
-
-def alarm_handler(signum, frame):
-    raise Alarm
-
-signal.signal(signal.SIGALRM, alarm_handler)
-
-scripts = [#'../scripts/build_chipseq_infosite.py',
-           '../scripts/chipseq_pipeline.py',
-           #'../scripts/combine_gerald_stats.py',
-           #'../scripts/compare_microarray_binding.py',
-           '../scripts/create_pipeline_script.py',
-           '../scripts/extract_promoters.py',
-           '../scripts/filter_bed_by_position_count.py',
-           '../scripts/filter_macs_peaks.py',
-           '../scripts/filter_gps_peaks.py',
-           '../scripts/filter_mapped_known_genes.py',
-           #'../scripts/generate_stats_doc.py',
-           '../scripts/gerald_stats.py',
-           '../scripts/gerald_to_bed.py',
-           #'../scripts/integrate_macs_ucsc.py',
-           '../scripts/join_mapped_known_genes.py',
-           '../scripts/map_intervals.py',
-           '../scripts/map_peaks_to_genes.py',
-           '../scripts/map_peaks_to_known_genes.py',
-           '../scripts/motif_scan.py',
-           '../scripts/nibFrag.py',
-           '../scripts/org_settings.py',
-           '../scripts/peaks_to_fasta.py',
-           '../scripts/plot_pos_vs_neg_peaks.py',
-           '../scripts/plot_peak_loc_dist.py',
-           #'../scripts/probeset_to_known_gene.py',
-           '../scripts/rejection_sample_fasta.py',
-           '../scripts/sort_bed.py',
-           #'../scripts/split_file.py',
-           #'../scripts/split_qsub.py',
-           #'../scripts/THEME.sh',
-           #'../scripts/wait_for_qsub.py',
-           '../scripts/wait_for_jobid.py',
-           '../scripts/wqsub.py',
-           '../scripts/wqsub_drmaa.py',
-           ]
-
-if __name__ == '__main__' :
-
-    tw = TextWrapper(initial_indent="   ",subsequent_indent="   ")
-    script_help_out = ''
-    refs = ''
-    for script in scripts :
-        cmd = 'python %s -h'%script
-        p = Popen(cmd,shell=True,stdout=PIPE,stderr=PIPE)
-
-        stdout, stderr = None, None
-        signal.alarm(3)  # 3 seconds
-        try:
-            stdout, stderr = p.communicate()
-            signal.alarm(0)  # reset the alarm
-        except Alarm:
-            pass
-
-        script_str = script.replace('../scripts/','')
-
-
-        refs += '  - :ref:`%(script_str)s <%(script_str)s>`\n'%{'script_str':script_str}
-        script_help_out += '.. _%s:\n\n'%script_str
-        script_help_out += '%s::\n\n'%script_str
-        if stderr is None :
-            script_help_out += tw.fill('empty docstring\n')
-        else :
-            script_help_out += '\n'.join(['   '+x for x in stdout.split('\n')])
-            script_help_out += '\n'.join(['   '+x for x in stderr.split('\n')])
-        script_help_out += '\n\n'
-        script_help_out += ':ref:`top <top>`\n\n'
-
-    rst_str = """\
-Illumina pipeline script reference
-==================================
-
-The following is the output of the scripts provided by this package when invoked
-on the command line with *-h*.
-
-.. _top:
-
-Scripts:
-%(refs)s
-
-%(script_help_out)s
-"""%{'refs':refs,'script_help_out':script_help_out}
-
-    print rst_str
Binary file chipsequtil-master/docs/source/._conf.py has changed
Binary file chipsequtil-master/docs/source/._index.rst has changed
Binary file chipsequtil-master/docs/source/._module_reference.rst has changed
Binary file chipsequtil-master/docs/source/._module_src has changed
Binary file chipsequtil-master/docs/source/._quick_start.rst has changed
Binary file chipsequtil-master/docs/source/._script_reference.rst has changed
--- a/chipsequtil-master/docs/source/conf.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,198 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# ChIPSeqUtil documentation build configuration file, created by
-# sphinx-quickstart on Mon Oct 31 13:12:52 2011.
-#
-# This file is execfile()d with the current directory set to its containing dir.
-#
-# Note that not all possible configuration values are present in this
-# autogenerated file.
-#
-# All configuration values have a default; values that are commented out
-# serve to show the default.
-
-import sys, os
-
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-#sys.path.append(os.path.abspath('.'))
-
-# -- General configuration -----------------------------------------------------
-
-# Add any Sphinx extension module names here, as strings. They can be extensions
-# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
-extensions = ['sphinx.ext.autodoc', 'sphinx.ext.intersphinx', 'sphinx.ext.todo', 'sphinx.ext.coverage', 'sphinx.ext.pngmath']
-
-# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
-
-# The suffix of source filenames.
-source_suffix = '.rst'
-
-# The encoding of source files.
-#source_encoding = 'utf-8'
-
-# The master toctree document.
-master_doc = 'index'
-
-# General information about the project.
-project = u'ChIPSeqUtil'
-copyright = u'2011, Adam Labadorf'
-
-# The version info for the project you're documenting, acts as replacement for
-# |version| and |release|, also used in various other places throughout the
-# built documents.
-#
-# The short X.Y version.
-version = '1.5'
-# The full version, including alpha/beta/rc tags.
-release = '1.5'
-
-# The language for content autogenerated by Sphinx. Refer to documentation
-# for a list of supported languages.
-#language = None
-
-# There are two options for replacing |today|: either, you set today to some
-# non-false value, then it is used:
-#today = ''
-# Else, today_fmt is used as the format for a strftime call.
-#today_fmt = '%B %d, %Y'
-
-# List of documents that shouldn't be included in the build.
-#unused_docs = []
-
-# List of directories, relative to source directory, that shouldn't be searched
-# for source files.
-exclude_trees = []
-
-# The reST default role (used for this markup: `text`) to use for all documents.
-#default_role = None
-
-# If true, '()' will be appended to :func: etc. cross-reference text.
-#add_function_parentheses = True
-
-# If true, the current module name will be prepended to all description
-# unit titles (such as .. function::).
-#add_module_names = True
-
-# If true, sectionauthor and moduleauthor directives will be shown in the
-# output. They are ignored by default.
-#show_authors = False
-
-# The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
-
-# A list of ignored prefixes for module index sorting.
-#modindex_common_prefix = []
-
-
-# -- Options for HTML output ---------------------------------------------------
-
-# The theme to use for HTML and HTML Help pages.  Major themes that come with
-# Sphinx are currently 'default' and 'sphinxdoc'.
-html_theme = 'default'
-
-# Theme options are theme-specific and customize the look and feel of a theme
-# further.  For a list of options available for each theme, see the
-# documentation.
-#html_theme_options = {}
-
-# Add any paths that contain custom themes here, relative to this directory.
-#html_theme_path = []
-
-# The name for this set of Sphinx documents.  If None, it defaults to
-# "<project> v<release> documentation".
-#html_title = None
-
-# A shorter title for the navigation bar.  Default is the same as html_title.
-#html_short_title = None
-
-# The name of an image file (relative to this directory) to place at the top
-# of the sidebar.
-#html_logo = None
-
-# The name of an image file (within the static path) to use as favicon of the
-# docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
-# pixels large.
-#html_favicon = None
-
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
-
-# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
-# using the given strftime format.
-#html_last_updated_fmt = '%b %d, %Y'
-
-# If true, SmartyPants will be used to convert quotes and dashes to
-# typographically correct entities.
-#html_use_smartypants = True
-
-# Custom sidebar templates, maps document names to template names.
-#html_sidebars = {}
-
-# Additional templates that should be rendered to pages, maps page names to
-# template names.
-#html_additional_pages = {}
-
-# If false, no module index is generated.
-#html_use_modindex = True
-
-# If false, no index is generated.
-#html_use_index = True
-
-# If true, the index is split into individual pages for each letter.
-#html_split_index = False
-
-# If true, links to the reST sources are added to the pages.
-#html_show_sourcelink = True
-
-# If true, an OpenSearch description file will be output, and all pages will
-# contain a <link> tag referring to it.  The value of this option must be the
-# base URL from which the finished HTML is served.
-#html_use_opensearch = ''
-
-# If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml").
-#html_file_suffix = ''
-
-# Output file base name for HTML help builder.
-htmlhelp_basename = 'ChIPSeqUtildoc'
-
-
-# -- Options for LaTeX output --------------------------------------------------
-
-# The paper size ('letter' or 'a4').
-#latex_paper_size = 'letter'
-
-# The font size ('10pt', '11pt' or '12pt').
-#latex_font_size = '10pt'
-
-# Grouping the document tree into LaTeX files. List of tuples
-# (source start file, target name, title, author, documentclass [howto/manual]).
-latex_documents = [
-  ('index', 'ChIPSeqUtil.tex', u'ChIPSeqUtil Documentation',
-   u'Adam Labadorf', 'manual'),
-]
-
-# The name of an image file (relative to this directory) to place at the top of
-# the title page.
-#latex_logo = None
-
-# For "manual" documents, if this is true, then toplevel headings are parts,
-# not chapters.
-#latex_use_parts = False
-
-# Additional stuff for the LaTeX preamble.
-#latex_preamble = ''
-
-# Documents to append as an appendix to all manuals.
-#latex_appendices = []
-
-# If false, no module index is generated.
-#latex_use_modindex = True
-
-
-# Example configuration for intersphinx: refer to the Python standard library.
-intersphinx_mapping = {'http://docs.python.org/': None}
--- a/chipsequtil-master/docs/source/index.rst	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,51 +0,0 @@
-.. ChIPSeqUtil documentation master file, created by
-   sphinx-quickstart on Mon Oct 31 13:12:52 2011.
-   You can adapt this file completely to your liking, but it should at least
-   contain the root `toctree` directive.
-
-Welcome to ChIPSeqUtil's documentation!
-=======================================
-
-ChIPSeqUtil is a python module and accompanying set of scripts used in the
-analysis of ChIPSeq short read data.  It is designed as a 'push-button' solution
-that is easy for non-linux-experts to use but is flexible and extensible enough
-to accomodate special cases when they inevitably arise. The default pipeline
-performs the following analysis steps:
-
-1. runs a peak caller (MACS by default)
-2. optionally creates and stages bigwig files for viewing on UCSC Genome Browser
-3. filters peaks based on confidence criteria (e.g. p-value)
-4. maps peaks to genes using UCSC knownGene annotations
-5. performs hypothesis-based motif analysis using TRANSFAC motifs
-6. builds a web page consolidating results
-
-ChIPSeqUtil has the following dependencies:
-
-  - MACS (or some other peaks caller)
-  - TAMO
-  - reStUtil
-  - pypeline
-  - bx python
-
-.. note:: add links to these bullets
-
-ChIPSeqUtil has only been tested on ubuntu-based linux distributions and no
-certification is made for other OSes.  That being said, some/all of it may
-still work.
-
-Contents:
-
-.. toctree::
-   :maxdepth: 2
-
-   quick_start
-   script_reference
-   module_reference
-
-Indices and tables
-==================
-
-* :ref:`genindex`
-* :ref:`modindex`
-* :ref:`search`
-
--- a/chipsequtil-master/docs/source/module_reference.rst	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,11 +0,0 @@
-
-Module Reference
-================
-
-The module documentation of the chipsequtil python package is here.
-
-.. toctree::
-
-    module_src/chipsequtil
-    module_src/nib
-    module_src/seq
Binary file chipsequtil-master/docs/source/module_src/._chipsequtil.rst has changed
Binary file chipsequtil-master/docs/source/module_src/._file_wrappers.rst has changed
Binary file chipsequtil-master/docs/source/module_src/._motiftools.rst has changed
Binary file chipsequtil-master/docs/source/module_src/._nib.rst has changed
Binary file chipsequtil-master/docs/source/module_src/._org_settings.rst has changed
Binary file chipsequtil-master/docs/source/module_src/._seq.rst has changed
Binary file chipsequtil-master/docs/source/module_src/._util.rst has changed
--- a/chipsequtil-master/docs/source/module_src/chipsequtil.rst	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,27 +0,0 @@
-
-chipsequtil
-===========
-
-Contents
---------
-
-.. toctree::
-
-    file_wrappers
-    org_settings
-
-
-.. automodule:: chipsequtil
-    :members:
-    :undoc-members:
-
-Miscellaneous Functions
------------------------
-
-.. autofunction:: get_file_parts
-.. autofunction:: parse_number
-.. autofunction:: gerald_to_bed
-.. autofunction:: reverse_complement
-.. autofunction:: get_gc_content
-.. autofunction:: get_gc_content_distribution
-.. autofunction:: get_size_distribution
--- a/chipsequtil-master/docs/source/module_src/file_wrappers.rst	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,28 +0,0 @@
-
-File Wrappers
-=============
-
-.. module:: chipsequtil
-
-.. autoclass:: SmartFileIter
-    :members:
-
-SmartFileIter-based classes
----------------------------
-
-.. autoclass:: BEDFile
-.. autoclass:: GPSFile
-.. autoclass:: MACSFile
-.. autoclass:: KnownGeneFile
-
-Other wrappers
---------------
-
-Not all of the file wrappers in this package have been converted to SmartFileIters
-yet, these work but are less robust.
-
-.. autoclass:: AffyBiocFile
-.. autoclass:: GERALDOutput
-    :members:
-.. autoclass:: RefGeneFile
-
--- a/chipsequtil-master/docs/source/module_src/motiftools.rst	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,56 +0,0 @@
-
-Motif Classes and Functions
-===========================
-
-This module is essentially a copy of TAMO.MotifTools, moved into chipsequtil
-for strategic sheep purposes.
-
-.. automodule:: chipsequtil.motiftools
-
-The Motif Class
----------------
-
-.. autoclass:: Motif
-   :members:
-
-Functions
----------
-
-.. .. autofunction::  revcomplement
-.. autofunction::  Motif_from_ll
-.. autofunction::  Motif_from_counts
-.. autofunction::  Motif_from_text
-.. autofunction::  copy
-.. .. autofunction::  minwindowdiff
-.. .. autofunction::  minaligndiff
-.. autofunction::  diff
-.. autofunction::  maskdiff
-.. autofunction::  infomaskdiff
-.. autofunction::  diverge
-.. autofunction::  bestseqs
-.. autofunction::  seqs2fasta
-.. autofunction::  top_nmers
-.. autofunction::  m_matches
-.. autofunction::  compare_seqs
-.. autofunction::  shuffle_bases
-.. autofunction::  random_diff_avestd
-.. autofunction::  random_motif
-.. autofunction::  toDict
-.. autofunction::  toDictVect
-.. autofunction::  submotif
-.. autofunction::  shuffledP
-.. autofunction::  revcompmotif
-.. autofunction::  sum
-.. autofunction::  giflogo
-.. autofunction::  seqlogo
-.. autofunction::  merge
-.. autofunction::  avestd
-.. autofunction::  load
-.. autofunction::  save_motifs
-.. autofunction::  print_motif
-.. autofunction::  print_motifs
-.. autofunction::  nlog10
-.. autofunction::  txt2motifs
-.. autofunction::  pickletxt2motifs
-.. autofunction::  sortby
-.. .. autoclass:: MotifToolsException
--- a/chipsequtil-master/docs/source/module_src/nib.rst	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,36 +0,0 @@
-
-.. module:: chipsequtil.nib
-
-nibFrag API
-===========
-
-These functions and classes are a native python implementation of Jim Kent's nibFrag
-utility and file format.  The scripts and classes read *.nib* files and can
-extract sequences from them as fast or faster than the standalone tools, and
-also make sequence data accessible and efficient from within python scripts.
-There is no provided utility to create *.nib* files, the original source scripts
-must be used and are not provided in this distribution.  They might be found on
-`Jim Kent's homepage <http://users.soe.ucsc.edu/~kent/>`_.
-
-
-The NibDB Class
----------------
-
-.. autoclass:: NibDB
-    :members:
-
-Functions
----------
-
-Most of these functions should not be used directly, rather they are called
-by the NibDB class and implement the gritty details of reading *.nib* files.
-Use the NibDB class instead unless you know what you're doing.
-
-
-.. autofunction:: get_nib
-.. autofunction:: get_nib_batch
-.. autofunction:: get_nib_seq
-.. autofunction:: get_nib_header
-.. autofunction:: get_nib_header_batch
-.. autofunction:: validate_nib_file
-.. autofunction:: get_nib_seq_batch
--- a/chipsequtil-master/docs/source/module_src/org_settings.rst	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,91 +0,0 @@
-
-The `org_settings` System
-=========================
-
-Many scripts in this package require a number of different source files that all
-correspond to a single reference genome (*e.g.* mm9).  The `org_settings` set of
-functions and *org_settings.py* script consolidates sets of paths/variables that
-correspond to different references to be bundled together in a customizable,
-accessible way.  The bundles are configured as a package-wide settings on install
-and alternatively by a user-specific configuration file.  The format of the file
-follows the conventions in `configparser`_.
-
-.. _configparser: http://docs.python.org/library/configparser.html
-
-Reference genomes are specified in a configuration file as follows::
-
-    [mm9]
-    description=UCSC mm9 (July '07 build) with full TRANSFAC hypothesis set
-    genome=mm9
-    genome_dir=/nfs/genomes/mouse_gp_jul_07
-    genome_size=2107000000
-    ucsc_chrom_sizes=%(genome_dir)s/%(genome)s.chrom.sizes
-    annotation_path=%(genome_dir)s/anno/refFlat-%(genome)s.txt
-    refgene_anno_path=%(genome_dir)s/anno/refFlat-%(genome)s.txt
-    known_gene_anno_path=%(genome_dir)s/anno/knownGene-%(genome)s.txt
-    known_gene_xref_path=%(genome_dir)s/anno/kgXref-%(genome)s.txt
-    affy_to_known_path=%(genome_dir)s/anno/knownToMOE43-%(genome)s.txt
-    theme_hypotheses=/nfs/vendata/cwng/TRANSFAC/2010_transfac_vert_all_filtic9.tamo
-    theme_markov=/nfs/data/cwng/chipseq/hypotheses/Mouse.markov
-
-This will make **mm9** available as an organism reference to the `org_settings`
-functions. The *ucsc_chrom_sizes*, *annotation_path*, *refgene_anno_path*,
-*known_gene_anno_path*, *known_gene_xref_path*, and *affy_to_known_path* are
-files downloaded from http://hgdownload.cse.ucsc.edu/downloads.html organims
-annotation databases.  The fields in the above example are all required for the
-package to work properly - however, any additional variables may be added as
-desired.
-
-API Functions
--------------
-
-.. module:: chipsequtil
-
-.. autofunction:: get_org_settings
-.. autofunction:: get_all_settings
-.. autofunction:: get_global_settings
-.. autofunction:: get_local_settings
-.. autofunction:: check_org_settings
-
-The *org_settings.py* script
-----------------------------
-
-The script *org_settings.py* is a command line interface into the `org_settings`
-system.  It has the following usage::
-
-  $> org_settings.py -h
-  Usage: org_settings.py [options] [<org key> [<org setting>]]
-
-  Tool for retrieving sets of organism-specific settings and paths. Original
-  paths are set at install time, and can be overridden in the file ~/.org
-  settings.cfg. Allows output of settings in a variety of shell environment
-  syntaxes.  The tool attempts to guess which shell environment is being used by
-  examining the SHELL environment variable unless explicitly set.  When run
-  without an argument, returns a listing of all settings available.
-
-  Options:
-    -h, --help            show this help message and exit
-    -s SYNTAX, --syntax=SYNTAX
-                          syntax flavor                   of output to produce
-                          [default: %auto]
-    -l, --list            print                   all available settings for
-                          human consumption
-  $> org_settings.py -s bash mm9 genome_dir
-  /nfs/genomes/mouse_gp_jul_07
-  $>
-
-If you use bash as your shell, you can use shell expansion to conveniently build
-commands such as the following::
-
-  $> map_peaks_to_known_genes.py $(org_settings.py mm9 known_gene_anno_path) \
-     $(org_settings.py mm9 known_gene_xref_path) macs_peaks.xls
-
-Installing
-----------
-
-The file *org_settings.cfg* exists in the root directory of the source distribution.
-This file should be modified and then copied into the *src/chipsequtil/* directory
-before installation for org settings that should be available on the system as a
-whole.  Alternatively, users may create the file *.org_settings.cfg* in their home
-directories and add sections like the one above so they may customize their own
-sets of variables.
--- a/chipsequtil-master/docs/source/module_src/seq.rst	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,34 +0,0 @@
-
-.. module:: chipsequtil.seq
-
-Sequence data functions and classes
-===================================
-
-This module has simple methods for reading in FASTA and FASTQ formatted files.
-*fasta_itr* and *fastq_itr* should be used when it is unnecessary or undesired
-to have all sequences loaded into memory.  *FASTAFile* and *FASTQFile* classes
-store all sequence information in memory, but allow efficient dictionary-style
-random access to sequences and quality scores as well as repeated whole-file
-iteration.
-
-Functions
----------
-
-.. autofunction:: fasta_itr
-.. autofunction:: fasta_to_dict
-.. autofunction:: write_fasta_to_file
-
-.. autofunction:: fastq_itr
-.. autofunction:: fastq_to_dict
-.. autofunction:: write_fastq_to_file
-
-Classes
--------
-
-.. autoclass:: FASTAFile
-    :members:
-
-.. autoclass:: FASTQFile
-    :members:
-
-
--- a/chipsequtil-master/docs/source/module_src/util.rst	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,4 +0,0 @@
-
-Utility functions and classes
-=============================
-
--- a/chipsequtil-master/docs/source/quick_start.rst	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,5 +0,0 @@
-
-Quick Start Documentation
-=========================
-
-
--- a/chipsequtil-master/docs/source/script_reference.rst	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,892 +0,0 @@
-Illumina pipeline script reference
-==================================
-
-The following is the output of the scripts provided by this package when invoked
-on the command line with *-h*.
-
-.. _top:
-
-Scripts:
-  - :ref:`chipseq_pipeline.py <chipseq_pipeline.py>`
-  - :ref:`create_pipeline_script.py <create_pipeline_script.py>`
-  - :ref:`extract_promoters.py <extract_promoters.py>`
-  - :ref:`filter_bed_by_position_count.py <filter_bed_by_position_count.py>`
-  - :ref:`filter_macs_peaks.py <filter_macs_peaks.py>`
-  - :ref:`filter_gps_peaks.py <filter_gps_peaks.py>`
-  - :ref:`filter_mapped_known_genes.py <filter_mapped_known_genes.py>`
-  - :ref:`gerald_stats.py <gerald_stats.py>`
-  - :ref:`gerald_to_bed.py <gerald_to_bed.py>`
-  - :ref:`join_mapped_known_genes.py <join_mapped_known_genes.py>`
-  - :ref:`map_intervals.py <map_intervals.py>`
-  - :ref:`map_peaks_to_genes.py <map_peaks_to_genes.py>`
-  - :ref:`map_peaks_to_known_genes.py <map_peaks_to_known_genes.py>`
-  - :ref:`motif_scan.py <motif_scan.py>`
-  - :ref:`nibFrag.py <nibFrag.py>`
-  - :ref:`org_settings.py <org_settings.py>`
-  - :ref:`peaks_to_fasta.py <peaks_to_fasta.py>`
-  - :ref:`plot_pos_vs_neg_peaks.py <plot_pos_vs_neg_peaks.py>`
-  - :ref:`plot_peak_loc_dist.py <plot_peak_loc_dist.py>`
-  - :ref:`rejection_sample_fasta.py <rejection_sample_fasta.py>`
-  - :ref:`sort_bed.py <sort_bed.py>`
-  - :ref:`wait_for_jobid.py <wait_for_jobid.py>`
-  - :ref:`wqsub.py <wqsub.py>`
-  - :ref:`wqsub_drmaa.py <wqsub_drmaa.py>`
-
-
-.. _chipseq_pipeline.py:
-
-chipseq_pipeline.py::
-
-   Usage: chipseq_pipeline.py [options] <organism> <experiment alignment filename> [<control alignment filename>]
-
-   1st generation ChIPSeq analysis pipeline:
-
-     - runs MACS to find peaks and sorts peaks by p-value
-     - sorts peaks by pvalue and isolates top *n*
-     - maps peaks to genes
-     - extracts fasta files for gene peaks in experiments
-     - constructs background sequences matching foreground distribution
-     - runs THEME.py on input sequences w/ refinement
-     - builds an infosite with stats from this analysis
-
-   Control input file is optional.  *organism* argument is passed to the
-   *org_settings.py* command to specify organism specific parameters, ensure
-   that the following commands return valid paths:
-
-   If running MACS:
-    - org_settings.py <organism> genome_size
-    - org_settings.py <organism> genome_dir
-    - org_settings.py <organsim> refgene_anno_path
-
-   If running THEME:
-    - org_settings.py <organism> theme_hypotheses
-    - org_settings.py <organism> theme_markov
-
-
-
-   Options:
-     -h, --help            show this help message and exit
-     --auto                run all steps non-interactively (for batch mode, e.g.)
-     --steplist=STEPLIST   with --auto, run specific steps
-     --exp-name=EXP_NAME   name for the experiment/pipeline, used for convenience
-                           [default: current directory name]
-     --bed-args=BED_ARGS   double quote wrapped arguments for gerald_to_bed.py
-                           [default: --stdout --chromo-strip=.fa]
-     --macs-exec=MACS_EXEC
-                           the executable to use for MACS, if not an absolute
-                           path it needs to be on your shell environment path
-                           [default: macs14]
-     --macs-args=MACS_ARGS
-                           double quote wrapped arguments for macs, only changing
-                           --mfold, --tsize, --bw, and --pvalue recommended
-                           [default: --pvalue=1e-5]
-     --map-args=MAP_ARGS   double quote wrapped arguments for mapping peaks to
-                           genes [default: --tss --upstream-window=10000
-                           --downstream-window=10000]
-     --filter-peaks-args=FILTER_PEAKS_ARGS
-                           double quote wrapped arguments for
-                           filter_macs_peaks.py [default: --sort-by=pvalue
-                           --top=1000 -f 'tags>20']
-     --filter-neg-peaks-args=FILTER_NEG_PEAKS_ARGS
-                           double quote wrapped arguments for
-                           filter_macs_peaks.py applied to negative peaks
-                           [default: -f 'tags>20']
-     --peaks-to-fa-args=PEAKS_TO_FA_ARGS
-                           double quote wrapped arguments for peaks_to_fasta.py
-                           [default: --fixed-peak-width=200]
-     --bg-exec=BG_EXEC     the executable to use for generating background
-                           sequences for THEME, if not an absolute path it needs
-                           to be on your shell environment path [default:
-                           rejection_sample_fasta.py]
-     --bg-args=BG_ARGS     double quote wrapped arguments for background sequence
-                           generation utility [default: --num-seq=2.1x]
-     --theme-args=THEME_ARGS
-                           double quote wrapped arguments for THEME.py [default:
-                           --beta=0.7 --cv=5 --trials=25]
-     --motif-pval-cutoff=MOTIF_PVAL
-                           the p-value cutoff for sending non-refined enrichmed
-                           motifs to THEME for refinement
-     --parallelize         parallelize portions of the pipeline using qsub, only
-                           works from SGE execution hosts
-     --ucsc                perform tasks for automated integration with UCSC
-                           genome browser [default:False]
-     --build-infosite-args=INFOSITE_ARGS
-                           arguments to pass to build_chipseq_infosite.py
-                           [default: None]
-
-     UCSC Integration Options (with --ucsc):
-       --stage-dir=STAGE_DIR
-                           root directory where UCSC integration files should be
-                           made available [default: ./]
-       --stage-url=STAGE_URL
-                           URL where UCSC integration files will be made
-                           available over the web [default: http://localhost/]
-
-   Note: it is advised to leave the --*-args arguments unchanged
-   unless you really know what you're doing.
-
-
-:ref:`top <top>`
-
-.. _create_pipeline_script.py:
-
-create_pipeline_script.py::
-
-   This is an interactive script that creates an executable script to use
-   for ChIPSeq analyses. When prompted for experiment and control files,
-   tab completion is available a la bash or tcsh shells. Press Ctrl-C at
-   any time to quit.
-   Usage: create_pipeline_script.py
-
-   Script for creating a custom run script for ChIPSeq/DNAse hypersensitivity
-   experiments.  User is asked for paths and settings required for ChIPSeq
-   analysis using the *chipseq_pipeline.py* utility and produces an executable
-   run script with helpful information on how to run it.  Also creates a JSON
-   formatted file containing all the parameters for this pipeline run.
-
-   Options:
-     -h, --help  show this help message and exit
-
-   Note: this script only works in Unix-style environments
-
-   ================= ChIPSeq Experiment Pipeline Script Generator =================
-
-
-:ref:`top <top>`
-
-.. _extract_promoters.py:
-
-extract_promoters.py::
-
-   Usage: extract_promoters.py [options] <organism>
-
-   Extract the promoter sequences in FASTA format from all genes
-   or a list of genes specified in an input file.  Gene annotation is RefGene
-   corresponding to the organism passed in, paths returned by:
-
-   $> org_settings.py <organism> refgene_anno_path
-   $> org_settings.py <organism> genome_dir
-
-   must be valid.
-
-   Options:
-     -h, --help            show this help message and exit
-     -u UPSTREAM, --upstream=UPSTREAM
-                           upstream window from TSS to extract [default: 3000]
-     -d DOWNSTREAM, --downstream=DOWNSTREAM
-                           downstream window from TSS to extract [default: 1000]
-     -l GENE_LIST, --gene-list=GENE_LIST
-                           file containing a list of gene identifiers to extract,
-                           one per line [default: none]
-     -t GENE_TYPE, --gene-type=GENE_TYPE
-                           type of gene identifier in gene list, choose from
-                           ['symbol', 'refgene'] [default: symbol]
-     -o OUTPUT, --output=OUTPUT
-                           file to write fasta records to [default: stdout]
-
-
-:ref:`top <top>`
-
-.. _filter_bed_by_position_count.py:
-
-filter_bed_by_position_count.py::
-
-   Usage: filter_bed_by_position_count.py [options] <bed file>
-
-   Analyze BED file and filter out alignments above some threshold that align to
-   a single genomic position.
-
-   Options:
-     -h, --help            show this help message and exit
-     -n MAX_COUNT, --max-count=MAX_COUNT
-                           max tag count at a given position, filter above
-                           [default: 5]
-     --output=OUTPUT       write output to file
-
-   Note: only works if BED file is sorted!
-
-
-:ref:`top <top>`
-
-.. _filter_macs_peaks.py:
-
-filter_macs_peaks.py::
-
-   Usage: filter_macs_peaks.py [options] <MACS peak file>
-
-   Filter MACS peaks by supplied criteria.  Available filter features are:
-
-   length
-   tags
-   pvalue
-   fold_enrichment
-   fdr
-
-   Filters are provided as expressions using the [-f |--filter] option, e.g. the
-   command
-
-   filter_macs_peaks.py -f "tags>100" --filter="pvalue<=1e-9"
-   --filter="100<length<=200" <MACS peak file>
-
-   finds only peaks with more than 100 tags, a pvalue of less than 1e9, and a
-   length between 100, exclusive, and 200, inclusive.  Any number of filters may
-   be provided, and only peaks that match *all* filters pass.  User is warned if
-   filters result in zero results.  Only inequality operators are valid.
-   Invoking with no filter arguments returns all peaks.  To sort, use the --sort-
-   by option, e.g.
-
-   filter_macs_peaks.py -f "pvalue<=1e-9" --sort-by=pvalue <MACS peak file>
-
-   sorts peaks with a pvalue smaller than 1e-9 with the smallest pvalue peaks.
-   All fields are sorted ascending by default.  Output is prepended with comments
-   describing what the file contains, i.e. which filters are applied, how many
-   records there are, etc.
-
-   Note: MACS -10*log10(pvalue) values are converted to normal pvalues
-
-
-   Options:
-     -h, --help            show this help message and exit
-     -f FILTERS, --filter=FILTERS
-                           add filter expression
-     --sort-by=SORT_BY     comma delimited list of features to sort by, filtered
-                           peaks are not sorted by default, if provided peaks are
-                           sorted ascending by default
-     --sort-dir=SORT_DIR   direction to sort [default: ASCEND]
-     --top=TOP             accepts an integer, output at most this many peaks
-                           [default: all]
-     --output=OUTPUT       filename to output filtered peaks to [default: stdout]
-     --encode-filters      write out records to a file <MACS peaks
-                           file>_<filters>.xls (incompatible with --output
-                           option)
-     --summary             only print out summary information for the filter
-     --no-header           do not print out header or metadata info
-     --shuffle             shuffle order of filtered records, useful for
-                           selecting random peaks
-     --print-encoded-fn    print out the filename that would be created by
-                           --encode-filters
-
-
-:ref:`top <top>`
-
-.. _filter_gps_peaks.py:
-
-filter_gps_peaks.py::
-
-   Usage: filter_gps_peaks.py [options] <GPS peak file>
-
-   Filter GPS peaks by supplied criteria.  Available filter features are:
-
-   IP
-   Control
-   Fold
-   qvalue
-   pvalue
-   IPvsEMP
-   IPvsCTR
-
-   Filters are provided as expressions using the [-f |--filter] option, e.g. the
-   command
-
-   filter_gps_peaks.py -f "IP>100" --filter="pvalue<=1e-9" <GPS peak file>
-
-   finds only peaks with more than 100 tags and a pvalue of less than 1e9.  Any
-   number of filters may be provided, and only peaks that match *all* filters
-   pass. User is warned if filters result in zero results.  Only inequality
-   operators are valid.  Invoking with no filter arguments returns all peaks.  To
-   sort, use the --sort-by option, e.g.
-
-   filter_gps_peaks.py -f "pvalue<=1e-9" --sort-by=pvalue <GPS peak file>
-
-   sorts peaks with a pvalue smaller than 1e-9 with the smallest pvalue peaks.
-   All fields are sorted ascending by default.  Output is prepended with comments
-   describing what the file contains, i.e. which filters are applied, how many
-   records there are, etc.
-
-   Note: GPS P_-log10 and Q_-log10 values are converted to normal pvalues and
-   qvalues
-
-
-   Options:
-     -h, --help            show this help message and exit
-     -f FILTERS, --filter=FILTERS
-                           add filter expression
-     --sort-by=SORT_BY     comma delimited list of features to sort by, filtered
-                           peaks are not sorted by default, if provided peaks are
-                           sorted ascending by default
-     --sort-dir=SORT_DIR   direction to sort [default: ASCEND]
-     --top=TOP             accepts an integer, output at most this many peaks
-                           [default: all]
-     --output=OUTPUT       filename to output filtered peaks to [default: stdout]
-     --encode-filters      write out records to a file <GPS peaks
-                           file>_<filters>.xls (incompatible with --output
-                           option)
-     --summary             only print out summary information for the filter
-     --no-header           do not print out header or metadata info
-     --shuffle             shuffle order of filtered records, useful for
-                           selecting random peaks
-     --print-encoded-fn    print out the filename that would be created by
-                           --encode-filters
-
-
-:ref:`top <top>`
-
-.. _filter_mapped_known_genes.py:
-
-filter_mapped_known_genes.py::
-
-   Usage: filter_mapped_known_genes.py [options] <mapped known genes file>
-
-   Filter columns and rows from *join_mapped_known_genes.py* output which was
-   invoked with *--binary-plus* and *--field-types* flags.  Specify full column
-   names for either binding or expression data with the *--bind-cols* and
-   *--affy-cols* arguments, respectively. The special fieldname *MAPPED* from
-   *join_mapped_known_genes.py* is used to determine whether a file contains a
-   mapping for each gene.  To filter genes by their associated binding or
-   expression data, specify *--bind-filter* or *--affy-filter* as follows:
-
-     - *any* - report gene if at least one input file maps to the gene
-     - *all* - report gene if every input file maps to the gene
-     - *absent* - report gene if no input file maps to the gene
-     - *none* - do not filter genes at all (default)
-
-   Results of binding and expression filters are 'and'ed together, e.g.:
-
-   --bind-filter=all --affy-filter=absent
-
-   returns only genes for which all binding files and none of the expression
-   files map.
-
-
-   Options:
-     -h, --help            show this help message and exit
-     --bind-cols=BIND_COLS
-                           comma delimited list of binding data column names to
-                           include, [default: all]
-     --affy-cols=AFFY_COLS
-                           comma delimited list of expression data column names
-                           to include, [default: all]
-     --bind-filter=BIND_FILT
-                           gene set to include based on binding data [default:
-                           none]
-     --affy-filter=AFFY_FILT
-                           gene set to include based on expression data [default:
-                           none]
-     --output=OUTPUT       write output to file
-
-   Note: when specifying column names, be sure to escape characters like
-   (,),&,*,etc... that shells interpret with a \, e.g. --bind-
-   cols=-10\*log10\(pvalue\)
-
-
-:ref:`top <top>`
-
-.. _gerald_stats.py:
-
-gerald_stats.py::
-
-   Usage: gerald_stats.py [options] <filename> [<filename>...]
-
-   Outputs various stats about the GERALD formatted file(s) input. If multiple
-   files are provided statistics are aggregated according to the specified output
-   format.  Output formats available via --format=X :
-
-     # *python* - print an eval()'able python dictionary w/ counts
-     # *rst* - print statistics in a reStructured text table (default)
-     # *tab* - print statistics in a tab delimited form w/ header names
-
-   Except for *python* format, each input file has its own output line.  *python*
-   summarizes all alignments.
-
-
-   Options:
-     -h, --help       show this help message and exit
-     --output=OUTPUT  write output to file [default: stdout]
-     --format=FORMAT  format to print out stats [default: rst]
-
-
-:ref:`top <top>`
-
-.. _gerald_to_bed.py:
-
-gerald_to_bed.py::
-
-   Usage: gerald_to_bed.py [options] <GERALD file> [<GERALD file>...]
-
-   Convert the GERALD alignment formatted files into BED format.  Input file
-   named <path>/<filename>.<ext> is translated into <path>/<filename>.bed unless
-   --output or --stdout is specified, in which case formatted lines are written
-   to file or standard output, respectively.  If multiple input files are
-   supplied with the --output or --stdout option all formatted lines are
-   concatenated together. Formatting only occurs for GERALD input lines that have
-   a valid Match Position field (i.e. successfully aligned somewhere).
-
-   Options:
-     -h, --help            show this help message and exit
-     --output=OUTPUT       write all records to file
-     --stdout              write out all formatted lines to stdout
-     --min-fields          only format the first three fields
-     --pass-only           only format lines with Y in the Pass Filtering field
-     --chromo-strip=CHROMO_STRIP
-                           pattern to remove from chromo field in BED output
-                           (e.g. --chromo-strip=.fa to remve .fa from chrX.fa)
-                           [default: .fa]
-
-
-:ref:`top <top>`
-
-.. _join_mapped_known_genes.py:
-
-join_mapped_known_genes.py::
-
-   Usage: join_mapped_known_genes.py -b <mapped DNA binding file>|-a <mapped microarray file> [-b <mapped DNA binding file> ...] [-a <mapped microarray file> ...]
-
-   Join all files on the first column, concatenating records with matching
-   entries onto one line per entry.  Understands DNA binding data as mapped with
-   *map_peaks_to_known_genes.py* utility microarray data as mapped by
-   *probeset_to_known_genes.py* utility, passed to program using *-b* and *-a*
-   options respectively.  If a file contains more than one mapping to a gene
-   additional columns are added. At least one file of either type is required.
-   Field names are written as <filename>.<original field name>.<map number>
-
-   Options:
-     -h, --help            show this help message and exit
-     -a AFFY_FILE, --affy-file=AFFY_FILE
-                           add a mapped microarray file
-     -b BIND_FILE, --bind-file=BIND_FILE
-                           add a mapped DNA binding file (e.g. MACS, BED)
-     -m MACS_FILE, --macs-file=MACS_FILE
-                           DEPRECATED: use -b instead, add a mapped default MACS
-                           formatted peaks (*.xls) file
-     --output=OUTPUT       file to output joined records to [default: stdout]
-     --first-only          only output the first mapping to a gene from each file
-     --binary              output only one column per file with a 0 or 1 to
-                           indicate whether a mapping exists in that file
-     --binary-plus         output one column per file with a 0 or 1 to indicate
-                           whether a mapping exists in that file in addition to
-                           all other columns
-     --field-types         prepend BIND or AFFY to the beginning of all
-                           appropriate columns
-
-   Note: microarray files should have been created by bioconductor, and all files
-   should have a row of fieldnames as the first line
-
-
-:ref:`top <top>`
-
-.. _map_intervals.py:
-
-map_intervals.py::
-
-   Usage: map_intervals.py [options] <from> <to>
-
-   Find records in <to> interval file that map to records in <from> interval
-   file.  Files should be tab delimited and are expected to have a chromosome
-   column, a start column, and an end column.  The indices of these columns can
-   be specified on the command line but by default are the first three columns,
-   respectively.  Prints out to stdout by default one new line separated row per
-   row in <from> with a line from <to> where there is a mapping. If no mapping is
-   found (e.g. when specifying a maximum margin to search within) the word None
-   is printed.  By default only prints nearest record, with ties settled by
-   smallest line number in <to>.
-
-   Options:
-     -h, --help            show this help message and exit
-     -w WINDOW, --window=WINDOW
-                           window as <int upstream> <int downstream> to search
-                           for intervals [default: (1000000000.0, 1000000000.0)]
-     -f FROM_IND, --from=FROM_IND
-                           coordinates of chromosome, start, stop in <from> file
-     -i, --skip-from-header
-                           <from> has a header that should be skipped
-     -t TO_IND, --to=TO_IND
-                           coordinates of chromosome, start, stop in <to> file
-     -j, --skip-to-header  <to> has a header that should be skipped
-
-
-:ref:`top <top>`
-
-.. _map_peaks_to_genes.py:
-
-map_peaks_to_genes.py::
-
-   Usage: map_peaks_to_genes.py [options] <refGene file> <peaks file>
-
-    Map the peaks in <peaks file> to genes in <refGene file>.  <refGene file> is
-   format is as specified in
-   http://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/refGene.sql. <peaks
-   file> format is as produced by MACS.
-
-   Options:
-     -h, --help            show this help message and exit
-     --upstream-window=UPST_WIN
-                           window width in base pairs to consider promoter region
-                           [default: 5500]
-     --downstream-window=DNST_WIN
-                           window width in base pairs to consider downstream
-                           region [default: 2500]
-     --map-output=PEAK_OUTPUT
-                           filename to output mapped peaks in BED format to
-                           [default: stdout]
-     --stats-output=STATS_OUTPUT
-                           filename to output summary stats in conversion
-                           [default: stderr]
-     --peaks-format=PEAKS_FMT
-                           format of peaks input file [default: MACS]
-
-
-:ref:`top <top>`
-
-.. _map_peaks_to_known_genes.py:
-
-map_peaks_to_known_genes.py::
-
-   Usage: map_peaks_to_known_genes.py [options] <knownGene file> <knownGene xRef file> <peaks file>
-
-
-   Map the peaks in <peaks file> to genes in <knownGene file>.  <knownGene file>
-   isformat is as specified in
-   http://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/knownGene.sql.<peaks
-   file> format is as produced by MACS.  If *auto* is chosen (default) file
-   extension is examined for *.xls* for default MACS format or *.bed* for BED
-   format.  If the --detailoption is provided, the following extra fields are
-   appended to each row:
-
-   peak loc, dist from feature, score, map type, map subtype
-
-
-   Options:
-     -h, --help            show this help message and exit
-     --upstream-window=UPST_WIN
-                           window width in base pairs to consider promoter region
-                           [default: 5500]
-     --downstream-window=DNST_WIN
-                           window width in base pairs to consider downstream
-                           region [default: 2500]
-     --tss                 calculate downstream window from transcription start
-                           site instead of transcription end site
-     --map-output=PEAK_OUTPUT
-                           filename to output mapped peaks to [default: stdout]
-     --stats-output=STATS_OUTPUT
-                           filename to output summary stats in conversion
-                           [default: stderr]
-     --peaks-format=PEAKS_FMT
-                           format of peaks input file [default: auto]
-     --detail              add extra fields to output, see description
-     --intergenic          write intergenic peaks to the gene file as well with
-                           None as gene ID
-
-
-:ref:`top <top>`
-
-.. _motif_scan.py:
-
-motif_scan.py::
-
-   Usage: motif_scan.py [options] <org> <peaks fn> <TAMO motif fn>
-
-   Do some motif scanning stuffs
-
-   Options:
-     -h, --help            show this help message and exit
-     -n TOP_N, --top-n=TOP_N
-                           use top n peaks by pvalue for sequence scanning
-                           [default: all]
-     -i MOTIF_IND, --motif-indices=MOTIF_IND
-                           which indices from <TAMO motif fn> to use [default:
-                           all]
-     -d DIR, --dir=DIR     write all results into this directory
-     --fixed-peak-width=FIXED_W
-                           use only a fixed peak window around the summit instead
-                           of whole peak
-
-
-:ref:`top <top>`
-
-.. _nibFrag.py:
-
-nibFrag.py::
-
-   Usage: nibFrag.py [options] file.nib start end strand [outfile]
-     -- or --
-   nibFrag.py [options] --batch file.nib batchfile [batchfile ...]
-
-   A python implementation of Jim Kent's nibFrag utility that allows outputting
-   to stdout.  Otherwise the functionality is identical for the non-batch usage.
-   Batch mode accepts one or more files containing sets of coordinates to extract
-   from the nib file.  Only BED formatting is accepted at the moment. All
-   sequences are concatenated together in FASTA format.  To retrieve the entire
-   sequence, use END as the end argument.
-
-   Options:
-     -h, --help            show this help message and exit
-     --no-header           only output sequence (no fasta header)
-     --wrap-width=WRAP_WIDTH
-                           wrap output sequence at this number of bases, 0
-                           indicates no wrap (sequence ends up on single line)
-                           [default: 50]
-     --batch               run in batch mode, interpret arguments after nib file
-                           as queries
-     --batch-format=BATCH_FORMAT
-                           format to interpret batch files [default: BED]
-
-     Original nibFrag options:
-       --masked            use lower case characters for bases meant to be masked
-                           out
-       --hardMasked        use upper case for non masked-out and 'N' characters
-                           for masked-out bases
-       --upper             use upper case characters for all bases
-       --name=NAME         Use given name after '>' in output sequence
-       --dbHeader=DBHEADER
-                           Add full database info to the header, with or without
-                           -name option
-       --tbaHeader=TBAHEADER
-                           Format header for compatibility with tba, takes
-                           database name as argument
-
-   Note: When specifying --name optionin batch mode, also specify --dbHeader to
-   ensure unique FASTA headers.
-
-
-:ref:`top <top>`
-
-.. _org_settings.py:
-
-org_settings.py::
-
-   Usage: org_settings.py [options] [<org key> [<org setting>]]
-
-   Tool for retrieving sets of organism-specific settings and paths. Original
-   paths are set at install time, and can be overridden in the file ~/.org
-   settings.cfg. Allows output of settings in a variety of shell environment
-   syntaxes.  The tool attempts to guess which shell environment is being used by
-   examining the SHELL environment variable unless explicitly set.  When run
-   without an argument, returns a listing of all settings available.
-
-   Options:
-     -h, --help            show this help message and exit
-     -s SYNTAX, --syntax=SYNTAX
-                           syntax flavor                   of output to produce
-                           [default: %auto]
-     -l, --list            print                   all available settings for
-                           human consumption
-
-
-:ref:`top <top>`
-
-.. _peaks_to_fasta.py:
-
-peaks_to_fasta.py::
-
-   Usage: peaks_to_fasta.py [options] <organism> <peak file> [<peak file> ...]
-
-   Extract sequences for peaks in provided peak file(s).  Can interpret MACS or
-   BED output, determined automatically by .xls or .bed extensions respectively
-   (force explicit format with --peak-format option).  Outputs fasta sequences
-   for the peaks in all files extracted from the reference genome specified by
-   the output of *org_settings.py <organism> genome_dir* to stdout by
-   default.Chromosome names in peak files must match nib filenames without
-   extension (e.g. peak line: chr1 0  100 searches *genome_dir*/chr1.nib).  Fasta
-   records have the following format:
-
-   ><chromosome>:<start>-<end>;fn=<name of file>:<line number>;db_fn=<db
-   filename>;fmt=<format>;<source alignment info>
-   <sequence...>
-
-   <db filename> is the filename where the sequence was extracted, <format> is
-   the format of the input file (MACS or BED), and <source alignment info>
-   contains all the fields from the originating alignment according to the source
-   format.
-
-   Options:
-     -h, --help            show this help message and exit
-     --min-header          only store <chromosome>:<start>-<end> in header
-     --peak-format=PEAK_FORMAT
-                           peak file format, 'auto' determines format by
-                           extension, choices: MACS, BED, auto [default: auto]
-     --output=OUTPUT       filename to output fasta records to [default: stdout]
-     --fixed-peak-width=FIXED_PEAK_WIDTH
-                           return a fixed number of bases flanking peak summit
-                           (*summit* field in MACS, (end-start)/2 in BED),
-                           ignoring start/stop coords [default: None]
-     --wrap-width=WRAP_WIDTH
-                           wrap fasta sequences to specified width. -1 indicates
-                           no wrap [default: 70]
-
-
-:ref:`top <top>`
-
-.. _plot_pos_vs_neg_peaks.py:
-
-plot_pos_vs_neg_peaks.py::
-
-   Usage: plot_pos_vs_neg_peaks.py [options] <pos peaks fn> <neg peaks fn>
-
-   Options:
-     -h, --help            show this help message and exit
-     -o OUT_FN, --output=OUT_FN
-                           filename of output image
-
-
-:ref:`top <top>`
-
-.. _plot_peak_loc_dist.py:
-
-plot_peak_loc_dist.py::
-
-   Usage: plot_peak_loc_dist.py [options] <peaks fn> <gene list fn>
-
-   Produce a pie chart of the locations of peaks in different bins (promoter,
-   gene, exon, intron, etc.) and, optionally, save the different records to their
-   own files for subsequent analysis.  Also produce a histogram of distance from
-   feature values in mapping file. Peaks file is expected to be as output by
-   MACS, or alternately as a BED file but then the -b plot is not available.
-   Gene list file is expected to be in the format as output by
-   peaks_to_known_genes.py script.
-
-   Options:
-     -h, --help            show this help message and exit
-     -b BAR_FN, --bar-fn=BAR_FN
-                           filename for pvalue stacked bar chart
-     -g GENE_PIE_FN, --gene-pie-fn=GENE_PIE_FN
-                           filename for pie chart image
-     -p PEAK_PIE_FN, --peak-pie-fn=PEAK_PIE_FN
-                           filename for pie chart image
-     -f DIST_FN, --dist-fn=DIST_FN
-                           filename for distance from feature image
-     -s, --save            write out files containing peaks for each category
-     -d OUT_DIR, --output-dir=OUT_DIR
-                           output files created by --save option to this
-                           directory
-     --no-plot             dont show (but save) the figure produced
-     --peaks-format=PEAK_FMT
-                           format of peaks file, either MACS or BED [default:
-                           MACS]
-
-
-:ref:`top <top>`
-
-.. _rejection_sample_fasta.py:
-
-rejection_sample_fasta.py::
-
-   Usage: rejection_sample_fasta.py [options] <organism> <fasta file> [<fasta file> ... ]
-
-   Use rejection sampling to generate a set of background/random
-   sequences matching the distance to nearest transcription start site, sequence
-   length, and GC content distributions of the input fasta file(s).  Generated
-   sequences are genomic sequences sampled based on these distributions. All
-   sequences
-   from all files are used to generate the background sequences. The following
-   command must output a path to a nib genomic sequence directory and refGene
-   annotation, respectively :
-
-   $> org_settings.py <organism> genome_dir
-   $> org_settings.py <organism> refgene_anno_path
-
-   Utility prints out generated fasta records to stdout by default.  Input
-   sequences
-   from chr20 are mapped to chrX, chr21 are mapped to chrY, and sequences from
-   chrM
-   are not used.
-
-
-   Options:
-     -h, --help            show this help message and exit
-     -n NUM_SEQS, --num-seqs=NUM_SEQS
-                           number of sequences to generate, either absolute
-                           number or factor of # input sequences, e.g. 2.5x for
-                           2.5 times the # of input sequences [default: 1x]
-     --output=OUTPUT       file to output fasta records to [default: stdout]
-     --bed                 also produce a BED formatted file representing sampled
-                           sequences
-     --bed-output=BED_OUTPUT
-                           with --bed, file to output BED records to [default:
-                           output.bed]
-     -v, --verbose         print out debug information
-
-
-:ref:`top <top>`
-
-.. _sort_bed.py:
-
-sort_bed.py::
-
-   Usage: sort_bed.py [options] <BED file> [<BED file> <BED file>...]
-
-   Sort the BED formatted files first by chromosome (field 1) and then by start
-   coordinate (field 2).  Lines from all files submitted are concatenated and
-   sorted in the final output.
-
-   Options:
-     -h, --help       show this help message and exit
-     --output=OUTPUT  filename to write the sorted BED lines [default: stdout]
-
-
-:ref:`top <top>`
-
-.. _wait_for_jobid.py:
-
-wait_for_jobid.py::
-
-   Usage: wait_for_jobid.py [options] <job id> [<job id>...]
-
-   Poll qstat and wait until all <job id>s are finished
-
-   Options:
-     -h, --help  show this help message and exit
-
-
-:ref:`top <top>`
-
-.. _wqsub.py:
-
-wqsub.py::
-
-   Usage: [wqsub.py] [options] command
-
-   Wrap the specified command into a qsub script and submit it for execution.
-   Script captures both stdout and stderr to the current directory. By default,
-   all of the user's environment variables are put into the script (compatible
-   with SGE only ATM).
-
-   Options:
-     -h, --help            show this help message and exit
-     --wqsub-name=WQSUB_NAME
-                           job name to submit as <--wqsub-name>_<first non-
-                           whitespace chars in command> [default: wqsub]
-     --wqsub-ext=WQSUB_EXT
-                           file extension to use for stdout files
-     --wqsub-keep-script   do not delete qsub script generated after job
-                           submission
-     --wqsub-no-env        do not include any local environment variables in the
-                           script
-     --wqsub-no-submit     create script but do not submit job (useful for
-                           generating scripts)
-     --wqsub-drm=DRM       the DRM to generate scripts for [default: SGE]
-     --wqsub-drm-arg=DRM_ARGS
-                           arguments to pass as parameters in the job script
-                           specific to the DRM, use multiple option flags to
-                           specify multiple parameters
-     --wqsub-wait          poll the DRM and do not return control until job is
-                           finished (only works for TORQUE)
-
-   Note: this script only works in Unix-style environments.
-
-
-:ref:`top <top>`
-
-.. _wqsub_drmaa.py:
-
-wqsub_drmaa.py::
-
-      Traceback (most recent call last):
-     File "../scripts/wqsub_drmaa.py", line 9, in <module>
-       import drmaa
-   ImportError: No module named drmaa
-
-
-:ref:`top <top>`
-
-
-
Binary file chipsequtil-master/examples/._mapping has changed
Binary file chipsequtil-master/examples/._nib has changed
Binary file chipsequtil-master/examples/._seq has changed
Binary file chipsequtil-master/examples/mapping/._map_to_known_gene.sh has changed
Binary file chipsequtil-master/examples/mapping/._test_peaks.xls has changed
--- a/chipsequtil-master/examples/mapping/map_to_known_gene.sh	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,44 +0,0 @@
-#!/bin/bash
-
-# Usage: map_peaks_to_known_genes.py [options] <knownGene file> <knownGene xRef file> <peaks file>
-#
-#
-# Map the peaks in <peaks file> to genes in <knownGene file>.  <knownGene file>
-# is
-# format is as specified in
-# http://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/knownGene.sql.
-# <peaks file> format is as produced by MACS.  If *auto* is chosen (default)
-# file extension is examined for *.xls* for default MACS format or *.bed* for
-# BED format.  If the --detailoption is provided, the following extra fields are
-# appended to each row:
-#
-# peak loc, dist from feature, score, map type, map subtype
-#
-#
-# Options:
-#   -h, --help            show this help message and exit
-#   --upstream-window=UPST_WIN
-#                         window width in base pairs to consider promoter region
-#                         [default: 5500]
-#   --downstream-window=DNST_WIN
-#                         window width in base pairs to consider downstream
-#                         region [default: 2500]
-#   --tss                 calculate downstream window from transcription start
-#                         site instead of transcription end site
-#   --map-output=PEAK_OUTPUT
-#                         filename to output mapped peaks to [default: stdout]
-#   --stats-output=STATS_OUTPUT
-#                         filename to output summary stats in conversion
-#                         [default: stderr]
-b#   --peaks-format=PEAKS_FMT
-#                         format of peaks input file [default: auto]
-#   --detail              add extra fields to output, see description
-
-ORG=mm9
-KG_FN=$(org_settings.py $ORG known_gene_anno_path)
-XREF_FN=$(org_settings.py $ORG known_gene_xref_path)
-OPTS="--detail --tss --upstream-window=10000 --downstream-window=10000"
-PEAKS_FN=test_peaks.xls
-
-echo map_peaks_to_known_genes.py $OPTS $KG_FN $XREF_FN $PEAKS_FN
-map_peaks_to_known_genes.py $OPTS $KG_FN $XREF_FN $PEAKS_FN
\ No newline at end of file
--- a/chipsequtil-master/examples/mapping/test_peaks.xls	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,21 +0,0 @@
-# genes:
-# uc007aet.1	chr1	-	3195984	3205713	3195984	3195984	2	3195984,3203519,	3197398,3205713,		uc007aet.1
-# uc008wgw.1	chr5	+	3522764	3525260	3522764	3522764	1	3522764,	3525260,		uc008wgw.1
-#
-# chr5	3522663	3522664	1	0	1	0	0	1 - promoter
-# chr5	3522863	3522864	1	0	1	0	0	1 - in gene
-# chr5	3532563	3532564	1	0	1	0	0	1 - in downsteam
-# chr1	3205814	3205815	1	0	1	0	0	1 - promoter
-# chr1	3205614	3205615	1	0	1	0	0	1 - in gene
-# chr1	3195913	3195914	1	0	1	0	0	1 - in downstream
-# chr1	319588	319588	1	0	1	0	0	1 - unmapped
-#
-# chr1 is - strand, chr5 + strand, assumes 10k window around TSS
-chr	start	end	length	summit	tags	-10*log10(pvalue)	fold_enrichment	FDR(%)
-chr5	3522663	3522664	1	0	1	0	0	1
-chr5	3522863	3522864	1	0	1	0	0	1
-chr5	3532564	3532565	1	0	1	0	0	1
-chr1	3205814	3205815	1	0	1	0	0	1
-chr1	3205614	3205615	1	0	1	0	0	1
-chr1	3195913	3195914	1	0	1	0	0	1
-chr1	319588	319588	1	0	1	0	0	1
Binary file chipsequtil-master/examples/nib/._shuffled_peaks.bed has changed
Binary file chipsequtil-master/examples/nib/._test_batch_fasta.py has changed
Binary file chipsequtil-master/examples/nib/._test_nib_db.py has changed
--- a/chipsequtil-master/examples/nib/shuffled_peaks.bed	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1000 +0,0 @@
-chr19	29505473	29505892	MACS_peak_4348	103.85
-chr5	23950711	23951266	MACS_peak_6268	83.33
-chr1	75303135	75303785	MACS_peak_206	88.17
-chr3	105611391	105612033	MACS_peak_5420	56.03
-chr4	140654843	140655635	MACS_peak_6105	178.49
-chr2	37590398	37590707	MACS_peak_4677	75.45
-chr1	107761995	107762362	MACS_peak_312	96.07
-chr3	153387629	153388143	MACS_peak_5657	52.58
-chr11	88165911	88166520	MACS_peak_1474	62.73
-chr11	109512132	109512551	MACS_peak_1616	128.82
-chr18	57085271	57085755	MACS_peak_4115	107.73
-chr13	96661232	96661599	MACS_peak_2313	62.56
-chr3	95164133	95164494	MACS_peak_5342	93.42
-chr3	107434353	107434982	MACS_peak_5438	65.35
-chr11	6525702	6526208	MACS_peak_1057	56.89
-chr17	71137869	71138311	MACS_peak_3922	65.19
-chr5	120915880	120916171	MACS_peak_6566	100.90
-chr14	115241544	115242039	MACS_peak_2840	66.36
-chr3	115548096	115548809	MACS_peak_5466	146.81
-chr3	143368788	143369115	MACS_peak_5597	63.16
-chr12	73861752	73862246	MACS_peak_1870	80.18
-chr4	83619188	83619568	MACS_peak_5815	52.20
-chr7	80763465	80763988	MACS_peak_7410	71.38
-chr11	78816343	78817112	MACS_peak_1360	53.58
-chr10	80160393	80161035	MACS_peak_822	294.44
-chr13	32893584	32894176	MACS_peak_2117	81.21
-chr10	78218410	78218726	MACS_peak_790	64.14
-chr11	58907018	58907334	MACS_peak_1205	98.43
-chr3	104162680	104163086	MACS_peak_5410	55.17
-chr6	39156271	39156786	MACS_peak_6854	61.68
-chr18	85020575	85021002	MACS_peak_4215	74.27
-chr6	72166566	72167067	MACS_peak_6931	69.03
-chr17	56748737	56749331	MACS_peak_3884	106.79
-chr2	57090575	57091032	MACS_peak_4713	76.38
-chr6	52662598	52663126	MACS_peak_6888	97.50
-chr5	88982859	88983700	MACS_peak_6425	295.50
-chr5	134967688	134968192	MACS_peak_6645	72.85
-chr17	29089160	29089657	MACS_peak_3724	82.93
-chr8	123062177	123062589	MACS_peak_8088	58.85
-chr11	85534180	85534673	MACS_peak_1423	87.33
-chr15	66990142	66990609	MACS_peak_3114	118.53
-chr8	106966580	106967082	MACS_peak_7997	113.60
-chr11	106888391	106889001	MACS_peak_1583	69.90
-chr19	11848049	11848520	MACS_peak_4306	51.63
-chr15	8584865	8585230	MACS_peak_2922	62.73
-chr17	87913100	87913467	MACS_peak_3983	114.07
-chr13	34254496	34254848	MACS_peak_2122	67.47
-chr1	59914119	59914399	MACS_peak_135	57.79
-chr4	140629745	140629986	MACS_peak_6102	81.30
-chr2	180446822	180447260	MACS_peak_5086	99.29
-chr2	29804429	29804860	MACS_peak_4600	65.92
-chr12	32992278	32992842	MACS_peak_1783	84.01
-chr14	99698259	99698564	MACS_peak_2803	84.57
-chr19	3832712	3833378	MACS_peak_4224	118.71
-chr15	100536597	100537082	MACS_peak_3300	154.87
-chr7	109390646	109391459	MACS_peak_7527	161.60
-chr7	151692825	151693219	MACS_peak_7719	66.56
-chr14	52639405	52639860	MACS_peak_2557	52.74
-chr1	158257693	158258023	MACS_peak_461	64.88
-chr12	76836098	76836626	MACS_peak_1878	62.73
-chr1	182998458	182998880	MACS_peak_570	52.74
-chr2	51797359	51797797	MACS_peak_4703	65.46
-chr8	96707068	96707513	MACS_peak_7960	104.94
-chr3	28143185	28143670	MACS_peak_5131	101.35
-chr6	88889418	88889830	MACS_peak_7010	52.74
-chr2	131937255	131937594	MACS_peak_4912	72.89
-chr7	25688982	25689460	MACS_peak_7246	62.73
-chr19	46938054	46938331	MACS_peak_4436	92.02
-chr7	138515654	138516191	MACS_peak_7671	84.15
-chr14	29767339	29767710	MACS_peak_2466	51.44
-chr15	86002731	86003183	MACS_peak_3240	72.40
-chr15	103088442	103089223	MACS_peak_3322	883.55
-chr19	33127653	33128234	MACS_peak_4366	116.11
-chr5	135450040	135450529	MACS_peak_6650	101.01
-chr15	51080445	51080929	MACS_peak_3050	62.73
-chr9	124009677	124010094	MACS_peak_8582	65.09
-chr1	107856029	107856432	MACS_peak_313	52.07
-chr10	107555226	107555677	MACS_peak_929	79.40
-chr7	55762430	55762866	MACS_peak_7364	91.92
-chr12	96882121	96882495	MACS_peak_1959	70.03
-chr3	68480776	68481485	MACS_peak_5235	78.55
-chr1	89537056	89537406	MACS_peak_259	53.07
-chr14	27335329	27335792	MACS_peak_2450	52.74
-chr17	56949680	56949993	MACS_peak_3889	81.91
-chr5	118928141	118928605	MACS_peak_6556	117.21
-chr8	84911554	84912100	MACS_peak_7907	71.83
-chr8	129108351	129108844	MACS_peak_8142	54.79
-chr3	78877870	78878229	MACS_peak_5251	71.22
-chr19	18650375	18650861	MACS_peak_4324	62.73
-chr6	87942729	87943305	MACS_peak_6992	81.55
-chr12	92821124	92821370	MACS_peak_1955	69.53
-chr11	18065187	18065398	MACS_peak_1077	97.88
-chr17	84515588	84516165	MACS_peak_3966	458.83
-chr9	92169110	92169873	MACS_peak_8447	108.56
-chr14	14920422	14920757	MACS_peak_2398	123.05
-chr9	34798448	34798810	MACS_peak_8223	70.98
-chr3	94306130	94306466	MACS_peak_5319	95.95
-chr5	115790919	115791717	MACS_peak_6543	254.81
-chr11	68780920	68781624	MACS_peak_1249	96.66
-chr1	55084208	55084643	MACS_peak_101	56.99
-chr11	115938781	115939242	MACS_peak_1655	106.79
-chr7	134851363	134852112	MACS_peak_7651	388.87
-chr2	25413082	25413751	MACS_peak_4557	108.33
-chr9	70760521	70761198	MACS_peak_8400	125.82
-chr1	132526233	132526605	MACS_peak_367	51.37
-chr12	77462231	77462609	MACS_peak_1880	61.61
-chr2	131322118	131322495	MACS_peak_4905	173.69
-chr12	8886534	8886943	MACS_peak_1732	62.46
-chr1	134921392	134922134	MACS_peak_388	97.78
-chr12	50546587	50546853	MACS_peak_1811	72.51
-chr16	44347497	44348102	MACS_peak_3445	67.73
-chr16	91448123	91448772	MACS_peak_3510	110.35
-chr8	96932624	96932949	MACS_peak_7968	67.46
-chr9	50409776	50410148	MACS_peak_8274	68.39
-chr15	39018860	39019403	MACS_peak_3023	96.24
-chrX	7548382	7548918	MACS_peak_8587	182.65
-chr1	36568547	36568801	MACS_peak_47	57.79
-chr3	133241295	133241605	MACS_peak_5543	56.48
-chr3	36470919	36471238	MACS_peak_5148	54.12
-chr5	137974253	137974619	MACS_peak_6683	59.42
-chr4	107278613	107279232	MACS_peak_5866	117.82
-chr8	3621220	3621676	MACS_peak_7722	76.85
-chr11	68792865	68793384	MACS_peak_1250	61.41
-chr11	107283838	107284259	MACS_peak_1593	62.31
-chr17	36162344	36162790	MACS_peak_3801	77.75
-chr2	119176647	119177021	MACS_peak_4841	59.32
-chr14	75947689	75947989	MACS_peak_2746	115.64
-chr2	32837666	32838081	MACS_peak_4650	56.37
-chr5	21772275	21772751	MACS_peak_6260	88.64
-chr4	88181586	88181956	MACS_peak_5819	83.97
-chr17	46210576	46211375	MACS_peak_3824	152.68
-chr8	113290700	113290975	MACS_peak_8033	68.02
-chr14	100246709	100247166	MACS_peak_2804	114.56
-chr18	21097256	21097529	MACS_peak_4028	188.09
-chr15	58175270	58175626	MACS_peak_3078	52.59
-chr9	61513942	61514355	MACS_peak_8334	216.69
-chr10	92184761	92185425	MACS_peak_881	113.68
-chr2	125450541	125451011	MACS_peak_4863	84.01
-chr7	120579702	120580147	MACS_peak_7571	84.01
-chr17	28313728	28314505	MACS_peak_3710	147.88
-chr17	85092137	85092578	MACS_peak_3972	60.05
-chr7	52391580	52392059	MACS_peak_7336	71.31
-chr4	106607491	106607860	MACS_peak_5861	62.73
-chr15	76531134	76532498	MACS_peak_3158	205.05
-chr12	86815403	86815709	MACS_peak_1937	55.62
-chr8	97381250	97381634	MACS_peak_7975	67.56
-chr2	18892130	18892531	MACS_peak_4517	53.98
-chr13	93362690	93363352	MACS_peak_2290	156.01
-chr4	134276344	134276744	MACS_peak_6023	66.29
-chr5	136189308	136189833	MACS_peak_6660	92.87
-chr13	54712548	54712992	MACS_peak_2192	70.78
-chr3	95116459	95117202	MACS_peak_5338	276.81
-chr15	55668280	55668565	MACS_peak_3068	57.79
-chr7	86508145	86508581	MACS_peak_7430	65.59
-chr13	64134767	64135424	MACS_peak_2229	84.01
-chr14	75405717	75405947	MACS_peak_2740	56.48
-chr2	34655577	34655906	MACS_peak_4662	86.22
-chr2	178420601	178420979	MACS_peak_5071	60.99
-chr7	80675775	80676079	MACS_peak_7406	57.04
-chr6	120314001	120314656	MACS_peak_7092	155.03
-chr11	103889450	103889863	MACS_peak_1547	105.51
-chr1	75209595	75210147	MACS_peak_201	195.59
-chr4	136209837	136210242	MACS_peak_6063	91.62
-chr19	38298472	38299109	MACS_peak_4384	52.09
-chr3	146318049	146318677	MACS_peak_5622	65.43
-chr8	97525645	97526124	MACS_peak_7981	83.75
-chr6	42299260	42299977	MACS_peak_6864	156.01
-chr13	95746101	95746664	MACS_peak_2305	118.67
-chr5	68262648	68262928	MACS_peak_6374	76.86
-chr9	4309901	4310202	MACS_peak_8156	57.32
-chr2	130455636	130455898	MACS_peak_4896	68.02
-chr7	133920084	133920580	MACS_peak_7627	94.25
-chr3	144712794	144713309	MACS_peak_5603	333.24
-chr4	41492809	41493178	MACS_peak_5745	61.67
-chr6	83725731	83726256	MACS_peak_6965	72.37
-chr14	123928421	123928771	MACS_peak_2892	53.07
-chr11	94409579	94409974	MACS_peak_1489	68.44
-chr2	165618765	165619347	MACS_peak_5039	77.35
-chr1	97210080	97210414	MACS_peak_302	73.84
-chr19	31412009	31412328	MACS_peak_4353	67.09
-chr7	146028031	146028398	MACS_peak_7696	57.49
-chr14	98617003	98617302	MACS_peak_2799	57.51
-chr19	44406048	44406439	MACS_peak_4413	66.95
-chr14	26681413	26681976	MACS_peak_2449	117.79
-chr2	128037989	128038430	MACS_peak_4878	52.74
-chr17	61434287	61434641	MACS_peak_3905	62.86
-chr15	36390225	36390517	MACS_peak_2989	66.98
-chr14	27398759	27399655	MACS_peak_2452	361.52
-chr11	116115836	116116290	MACS_peak_1661	77.03
-chr15	36579667	36580306	MACS_peak_2996	51.96
-chr1	57568835	57569128	MACS_peak_112	60.79
-chr15	67474872	67475357	MACS_peak_3123	158.15
-chr10	19428365	19428826	MACS_peak_632	89.84
-chr14	113392921	113393120	MACS_peak_2836	66.98
-chr15	38448807	38449350	MACS_peak_3019	59.81
-chr14	20991935	20992435	MACS_peak_2406	75.91
-chr6	134006321	134006678	MACS_peak_7142	71.38
-chr12	112127235	112127724	MACS_peak_2013	80.19
-chr14	76244671	76245541	MACS_peak_2752	107.12
-chr11	104164505	104164874	MACS_peak_1549	69.13
-chr7	134536698	134537132	MACS_peak_7646	78.65
-chr1	137867871	137868260	MACS_peak_415	143.30
-chr18	34665859	34666370	MACS_peak_4058	61.96
-chr1	129101475	129101945	MACS_peak_348	77.88
-chr11	72295448	72295925	MACS_peak_1293	156.01
-chr17	24591995	24592521	MACS_peak_3651	114.96
-chr15	3945339	3946408	MACS_peak_2896	271.22
-chr8	122250900	122251332	MACS_peak_8064	71.78
-chr11	115938158	115938571	MACS_peak_1654	58.78
-chr9	114597610	114598135	MACS_peak_8535	92.87
-chr6	43207256	43207620	MACS_peak_6869	70.82
-chr3	152935129	152935658	MACS_peak_5650	60.73
-chr3	94655429	94656010	MACS_peak_5324	210.22
-chr9	57368841	57369352	MACS_peak_8313	53.61
-chr4	3157974	3158349	MACS_peak_5679	52.74
-chr11	107211666	107212176	MACS_peak_1586	89.67
-chr15	42269449	42270170	MACS_peak_3035	131.90
-chr9	70682529	70683032	MACS_peak_8396	186.94
-chr8	27125446	27126059	MACS_peak_7778	102.67
-chr9	20896025	20896479	MACS_peak_8195	67.10
-chr15	75551370	75551790	MACS_peak_3136	66.67
-chr15	55028995	55029425	MACS_peak_3064	90.94
-chr16	18308240	18308586	MACS_peak_3350	58.96
-chr3	93353745	93354375	MACS_peak_5318	103.23
-chr16	23107242	23107924	MACS_peak_3367	113.70
-chr18	36486603	36487009	MACS_peak_4080	53.46
-chr18	5390330	5390807	MACS_peak_4001	113.10
-chr17	56428661	56429186	MACS_peak_3882	118.67
-chr2	18860310	18861083	MACS_peak_4512	84.24
-chr7	97888242	97888576	MACS_peak_7477	57.36
-chr3	21810071	21810487	MACS_peak_5121	118.67
-chr17	78181904	78182525	MACS_peak_3946	77.05
-chr14	56197450	56198063	MACS_peak_2598	129.98
-chr9	99140804	99141128	MACS_peak_8467	58.22
-chr10	92623323	92623821	MACS_peak_885	100.15
-chr4	140616351	140617131	MACS_peak_6099	80.20
-chr10	61142776	61143539	MACS_peak_744	80.20
-chr7	104485058	104485742	MACS_peak_7488	317.41
-chr11	115939476	115940007	MACS_peak_1656	92.32
-chr10	94580987	94581311	MACS_peak_903	56.69
-chr15	76157364	76157952	MACS_peak_3152	125.64
-chr13	14155415	14155855	MACS_peak_2065	52.91
-chr15	67066485	67066934	MACS_peak_3117	84.01
-chr7	29227640	29228147	MACS_peak_7277	73.17
-chr13	6514405	6514820	MACS_peak_2047	104.32
-chr4	140542557	140543005	MACS_peak_6097	144.88
-chr5	111937855	111938599	MACS_peak_6514	128.49
-chr16	44018427	44018767	MACS_peak_3442	64.02
-chr1	133421664	133422047	MACS_peak_377	82.81
-chrX	166419443	166419942	MACS_peak_8678	54.41
-chr15	93105701	93105937	MACS_peak_3251	154.40
-chr1	108780375	108780748	MACS_peak_320	57.56
-chr11	84636850	84637366	MACS_peak_1410	80.20
-chr17	24995915	24996584	MACS_peak_3656	135.78
-chr14	58033892	58034211	MACS_peak_2613	58.66
-chr13	29847874	29848368	MACS_peak_2108	158.86
-chr1	13520675	13521060	MACS_peak_11	108.01
-chr2	156137538	156137972	MACS_peak_4990	78.65
-chr8	87550632	87550994	MACS_peak_7941	66.66
-chr3	151768385	151768678	MACS_peak_5634	56.48
-chr3	108012888	108013451	MACS_peak_5443	78.80
-chr13	44597050	44597814	MACS_peak_2154	202.82
-chr2	31917741	31918033	MACS_peak_4624	91.45
-chr3	132521750	132522383	MACS_peak_5537	143.48
-chr12	4879663	4880069	MACS_peak_1724	78.35
-chr6	91628640	91629356	MACS_peak_7022	67.31
-chr3	81433756	81434158	MACS_peak_5257	67.93
-chr7	54138715	54139193	MACS_peak_7359	128.73
-chr5	137102584	137103013	MACS_peak_6672	81.24
-chr8	59967224	59967628	MACS_peak_7830	62.73
-chr14	73689765	73690147	MACS_peak_2729	76.26
-chr11	117671467	117671893	MACS_peak_1678	66.26
-chr1	133214967	133215419	MACS_peak_376	174.60
-chr15	72853276	72853636	MACS_peak_3127	52.74
-chr11	109334214	109334929	MACS_peak_1611	51.65
-chrX	45266253	45266899	MACS_peak_8617	128.35
-chr2	131877465	131877919	MACS_peak_4910	132.70
-chr9	20779965	20780304	MACS_peak_8192	60.28
-chr3	90068955	90069393	MACS_peak_5310	78.47
-chr5	76187734	76188295	MACS_peak_6404	65.35
-chr11	104180396	104181197	MACS_peak_1550	84.55
-chr9	43839155	43839734	MACS_peak_8243	170.66
-chr15	85812555	85813334	MACS_peak_3239	124.98
-chr16	30691946	30692407	MACS_peak_3394	226.47
-chr2	110401236	110401587	MACS_peak_4817	63.11
-chr5	125914300	125914711	MACS_peak_6623	65.51
-chr2	166483417	166483707	MACS_peak_5047	52.10
-chr8	60131046	60131454	MACS_peak_7833	52.74
-chr1	153024254	153024901	MACS_peak_439	66.68
-chr6	135133407	135133856	MACS_peak_7151	66.57
-chr7	82993032	82993383	MACS_peak_7423	52.99
-chr12	36728733	36729377	MACS_peak_1795	106.79
-chr19	54161870	54162283	MACS_peak_4440	93.96
-chr13	21366775	21367132	MACS_peak_2077	58.81
-chr7	140828409	140828831	MACS_peak_7684	68.81
-chr7	52771782	52772179	MACS_peak_7344	57.63
-chr11	57258571	57259095	MACS_peak_1191	98.14
-chr10	19855329	19855821	MACS_peak_636	125.55
-chr9	48594723	48595281	MACS_peak_8268	79.19
-chr4	41278073	41278681	MACS_peak_5744	81.51
-chr18	44988493	44988911	MACS_peak_4095	69.91
-chr1	74438395	74439179	MACS_peak_195	162.82
-chr3	108830511	108830918	MACS_peak_5453	62.62
-chr13	96427044	96427529	MACS_peak_2310	152.26
-chr1	142384049	142384472	MACS_peak_423	79.51
-chr1	179064649	179064883	MACS_peak_543	74.87
-chr3	105490131	105490555	MACS_peak_5418	63.69
-chr2	90508129	90508418	MACS_peak_4780	76.86
-chr15	81846602	81847127	MACS_peak_3217	117.96
-chr18	3270592	3271094	MACS_peak_3989	195.43
-chr1	108606863	108607335	MACS_peak_318	95.47
-chr13	75935312	75935640	MACS_peak_2250	63.30
-chr16	30789953	30790403	MACS_peak_3396	148.02
-chr10	111409491	111409958	MACS_peak_950	131.28
-chr9	40880928	40881362	MACS_peak_8236	65.72
-chr8	123191898	123192493	MACS_peak_8089	118.67
-chr12	86713029	86713482	MACS_peak_1934	95.23
-chr18	65281748	65282564	MACS_peak_4150	161.32
-chr9	37296593	37297143	MACS_peak_8230	129.00
-chr18	75530251	75530647	MACS_peak_4189	68.37
-chr14	64162422	64162897	MACS_peak_2650	62.73
-chr10	82222485	82222777	MACS_peak_854	129.73
-chr10	51248911	51249493	MACS_peak_714	104.02
-chr19	45612299	45612910	MACS_peak_4419	89.38
-chr16	59515986	59516330	MACS_peak_3480	72.46
-chr1	37364506	37364915	MACS_peak_55	77.39
-chr9	107436160	107436580	MACS_peak_8495	73.91
-chr6	123239085	123239498	MACS_peak_7098	80.30
-chr8	24145434	24145873	MACS_peak_7767	65.39
-chr17	59064066	59064738	MACS_peak_3903	193.40
-chr18	81626532	81626968	MACS_peak_4206	53.17
-chr8	72498191	72498470	MACS_peak_7850	76.86
-chr2	127033717	127034197	MACS_peak_4869	122.43
-chr3	153427354	153428352	MACS_peak_5658	173.73
-chr13	95777240	95777685	MACS_peak_2306	62.73
-chr6	90654616	90655084	MACS_peak_7016	74.57
-chr6	115545743	115546136	MACS_peak_7072	52.74
-chr7	52392685	52393201	MACS_peak_7337	105.13
-chr1	174445177	174445620	MACS_peak_534	65.13
-chr5	139853354	139853932	MACS_peak_6702	77.65
-chr17	44266175	44266552	MACS_peak_3809	97.39
-chr9	78919711	78920097	MACS_peak_8424	50.34
-chr2	120210305	120210628	MACS_peak_4846	69.96
-chr8	97679869	97680475	MACS_peak_7985	130.84
-chr14	70029196	70029514	MACS_peak_2696	58.75
-chr11	97574402	97574747	MACS_peak_1511	53.47
-chr2	56968627	56969614	MACS_peak_4711	285.57
-chr7	26472954	26473335	MACS_peak_7258	62.73
-chr1	146985918	146986334	MACS_peak_432	62.66
-chr6	30276109	30276518	MACS_peak_6816	73.17
-chr18	4969715	4970163	MACS_peak_3999	62.73
-chr6	85298851	85299333	MACS_peak_6971	130.45
-chr18	62318702	62319054	MACS_peak_4130	55.87
-chr7	97493416	97493783	MACS_peak_7473	95.23
-chr5	84728325	84728797	MACS_peak_6420	65.95
-chr15	96290510	96290960	MACS_peak_3260	75.41
-chr5	64493902	64494502	MACS_peak_6348	155.47
-chr12	70683782	70684144	MACS_peak_1854	74.38
-chr7	28259485	28260176	MACS_peak_7269	157.45
-chr3	102072769	102073100	MACS_peak_5391	64.80
-chr3	121177634	121178278	MACS_peak_5487	124.60
-chr3	141995570	141995959	MACS_peak_5587	74.70
-chr10	12681163	12681522	MACS_peak_617	57.44
-chr7	35770301	35770804	MACS_peak_7310	130.48
-chr3	107901318	107901701	MACS_peak_5442	68.87
-chr4	155406985	155407313	MACS_peak_6229	63.11
-chr14	46277533	46277983	MACS_peak_2523	63.09
-chr7	142790268	142790503	MACS_peak_7693	89.37
-chr9	66360249	66360570	MACS_peak_8377	57.12
-chr15	95621015	95621459	MACS_peak_3254	77.44
-chr4	71861086	71862075	MACS_peak_5807	206.03
-chr11	121065722	121066055	MACS_peak_1707	99.74
-chr19	9041528	9042024	MACS_peak_4289	107.21
-chr8	98477882	98478259	MACS_peak_7992	69.80
-chr18	75722207	75722491	MACS_peak_4193	91.45
-chr15	57812241	57812965	MACS_peak_3074	52.05
-chr3	58917608	58918452	MACS_peak_5209	152.48
-chr4	41660450	41660822	MACS_peak_5747	57.64
-chr11	11641587	11641928	MACS_peak_1066	55.63
-chr8	50911172	50911534	MACS_peak_7818	70.98
-chr11	120209562	120209886	MACS_peak_1697	66.59
-chr14	66971802	66972207	MACS_peak_2681	62.73
-chr3	98621426	98621709	MACS_peak_5379	72.51
-chr12	49775350	49775758	MACS_peak_1809	67.51
-chr12	17040311	17040756	MACS_peak_1752	86.89
-chr14	70465516	70466231	MACS_peak_2709	158.40
-chr4	106926454	106926892	MACS_peak_5863	65.46
-chr11	5221117	5221579	MACS_peak_1043	129.84
-chr11	51762768	51763314	MACS_peak_1170	137.47
-chr12	73948553	73949012	MACS_peak_1872	142.64
-chr15	12123626	12124218	MACS_peak_2933	94.43
-chr15	12246914	12247416	MACS_peak_2937	294.48
-chr2	7924537	7924842	MACS_peak_4478	63.31
-chr16	56916814	56917191	MACS_peak_3470	51.00
-chr14	57190198	57191173	MACS_peak_2608	120.19
-chr5	138011402	138012367	MACS_peak_6684	707.79
-chr1	36153980	36154800	MACS_peak_40	119.59
-chr9	105397273	105397630	MACS_peak_8483	90.84
-chr4	148542288	148542494	MACS_peak_6147	99.30
-chr7	134234472	134235313	MACS_peak_7633	215.08
-chr1	187186557	187186854	MACS_peak_584	84.01
-chr2	156703464	156703925	MACS_peak_5000	135.84
-chr2	45507624	45507896	MACS_peak_4694	66.03
-chr2	25110687	25111472	MACS_peak_4543	265.66
-chr13	23494534	23495087	MACS_peak_2082	74.85
-chr2	118738734	118739174	MACS_peak_4833	58.77
-chrX	11733021	11733752	MACS_peak_8601	84.09
-chr3	153560124	153560559	MACS_peak_5664	53.23
-chr8	97479035	97479520	MACS_peak_7976	156.01
-chr9	114662010	114662635	MACS_peak_8538	65.64
-chr18	56618529	56618905	MACS_peak_4110	56.89
-chr17	34057391	34058028	MACS_peak_3762	56.35
-chr1	99519858	99520254	MACS_peak_306	57.70
-chr4	136194817	136195184	MACS_peak_6060	98.40
-chr7	16611238	16611688	MACS_peak_7204	52.74
-chr1	60215214	60215684	MACS_peak_140	88.89
-chr6	149257575	149258040	MACS_peak_7180	65.32
-chr4	8159311	8159627	MACS_peak_5687	55.94
-chr14	45660604	45661144	MACS_peak_2518	98.84
-chr11	84024342	84024705	MACS_peak_1402	50.69
-chr11	108110784	108111439	MACS_peak_1606	82.28
-chr7	87590346	87590812	MACS_peak_7448	54.95
-chr9	35018443	35018749	MACS_peak_8226	74.08
-chr7	61764305	61764697	MACS_peak_7375	62.07
-chr3	137620670	137621228	MACS_peak_5569	110.82
-chr8	89147603	89148183	MACS_peak_7945	106.99
-chr10	80982282	80982979	MACS_peak_848	148.39
-chr2	113012940	113013326	MACS_peak_4821	56.59
-chr16	93767743	93768080	MACS_peak_3559	109.86
-chr2	4483390	4484698	MACS_peak_4459	128.33
-chr6	128792917	128793800	MACS_peak_7130	123.88
-chr5	148241425	148242026	MACS_peak_6759	97.14
-chr4	34829946	34830380	MACS_peak_5730	65.72
-chr3	37558222	37559100	MACS_peak_5161	173.11
-chr2	90894346	90894793	MACS_peak_4781	72.76
-chr8	107486121	107486440	MACS_peak_7999	81.28
-chr7	140064742	140065140	MACS_peak_7681	73.17
-chr12	30367083	30367515	MACS_peak_1770	73.89
-chrX	11711607	11711970	MACS_peak_8600	84.60
-chr15	5058192	5058833	MACS_peak_2901	89.38
-chr7	104727397	104728070	MACS_peak_7489	70.84
-chr6	133055524	133055892	MACS_peak_7138	62.47
-chr3	95558657	95559026	MACS_peak_5348	79.54
-chr17	35326947	35327306	MACS_peak_3781	66.90
-chr14	52816486	52817050	MACS_peak_2560	64.66
-chr1	87632880	87633301	MACS_peak_233	58.17
-chr9	57495286	57495888	MACS_peak_8318	128.82
-chr11	87571803	87572391	MACS_peak_1454	59.68
-chr4	101511482	101511839	MACS_peak_5847	71.38
-chr15	12251825	12252367	MACS_peak_2938	54.29
-chr8	24276703	24277308	MACS_peak_7770	91.11
-chr6	117981548	117981940	MACS_peak_7086	68.66
-chr7	118300107	118300551	MACS_peak_7564	60.79
-chr5	77553172	77553619	MACS_peak_6415	77.65
-chr7	133428410	133429279	MACS_peak_7615	176.96
-chr5	54386367	54386928	MACS_peak_6343	135.99
-chr2	157967843	157968322	MACS_peak_5015	52.74
-chr1	13579885	13580466	MACS_peak_13	106.79
-chr17	47825794	47826338	MACS_peak_3846	103.13
-chr15	96115848	96116105	MACS_peak_3259	89.28
-chr6	8018474	8018781	MACS_peak_6782	61.94
-chr1	58769938	58770557	MACS_peak_122	106.56
-chr18	13100063	13100479	MACS_peak_4020	101.54
-chr1	95462306	95462739	MACS_peak_289	61.51
-chr13	8456217	8456514	MACS_peak_2052	121.95
-chr8	87426937	87427392	MACS_peak_7932	147.42
-chr3	69488182	69488508	MACS_peak_5241	55.06
-chr5	108495385	108495819	MACS_peak_6502	53.36
-chr7	26391500	26391968	MACS_peak_7253	62.73
-chr14	122222542	122222864	MACS_peak_2879	60.02
-chr7	16880847	16881143	MACS_peak_7213	72.15
-chr10	84379493	84379935	MACS_peak_862	65.19
-chr1	93218296	93218729	MACS_peak_276	67.88
-chr7	134005243	134005813	MACS_peak_7631	156.01
-chr9	25059978	25060419	MACS_peak_8215	65.26
-chr2	4802272	4802882	MACS_peak_4463	90.71
-chr9	114640488	114640918	MACS_peak_8537	63.14
-chr1	155044510	155044840	MACS_peak_453	52.74
-chr2	181598797	181599317	MACS_peak_5098	52.05
-chr16	30227325	30227906	MACS_peak_3389	144.56
-chr2	33582864	33583285	MACS_peak_4654	61.53
-chr2	38920882	38921337	MACS_peak_4683	62.69
-chr12	8639627	8640095	MACS_peak_1730	89.27
-chr1	193244835	193245254	MACS_peak_592	63.53
-chr19	28042093	28042496	MACS_peak_4342	185.71
-chr18	67399653	67399896	MACS_peak_4155	104.20
-chr15	81702453	81702935	MACS_peak_3214	70.29
-chr2	4354267	4354521	MACS_peak_4454	66.03
-chr17	71599086	71599527	MACS_peak_3929	65.92
-chr11	115016216	115016976	MACS_peak_1629	260.88
-chr13	49402730	49403235	MACS_peak_2170	96.40
-chr1	173607566	173608216	MACS_peak_521	161.65
-chr4	149943597	149944833	MACS_peak_6170	714.32
-chr2	30033180	30033596	MACS_peak_4605	60.37
-chr12	73775435	73775826	MACS_peak_1868	193.90
-chr19	6686904	6687279	MACS_peak_4266	106.79
-chr13	94372068	94372377	MACS_peak_2298	52.57
-chr3	134875358	134875676	MACS_peak_5549	74.76
-chr14	35123318	35123817	MACS_peak_2496	91.32
-chr4	134064080	134064462	MACS_peak_6020	65.66
-chr7	38451614	38451982	MACS_peak_7318	51.67
-chr2	59721277	59721585	MACS_peak_4718	52.74
-chr4	148521433	148521717	MACS_peak_6146	50.43
-chr6	29651055	29651524	MACS_peak_6813	99.40
-chr2	25283862	25284454	MACS_peak_4553	118.30
-chr1	180335685	180336069	MACS_peak_548	70.43
-chr15	9000420	9001374	MACS_peak_2924	50.16
-chr17	76783407	76783741	MACS_peak_3945	73.33
-chr10	79377042	79377510	MACS_peak_798	105.25
-chr4	137813129	137813637	MACS_peak_6069	79.35
-chr19	23347935	23348633	MACS_peak_4335	168.98
-chr2	77014459	77014721	MACS_peak_4764	57.79
-chr17	27725137	27725648	MACS_peak_3703	81.61
-chr3	84271282	84271895	MACS_peak_5266	107.11
-chr4	149036130	149036714	MACS_peak_6156	127.11
-chr17	36157226	36157966	MACS_peak_3800	58.06
-chr9	113925463	113925818	MACS_peak_8532	139.16
-chr18	62455027	62455412	MACS_peak_4131	69.19
-chr2	143717397	143717821	MACS_peak_4930	57.72
-chr14	70058939	70059262	MACS_peak_2698	78.56
-chr9	8004492	8005091	MACS_peak_8167	146.39
-chr2	22750741	22751369	MACS_peak_4526	107.68
-chr11	113663893	113664272	MACS_peak_1623	118.67
-chr11	60643876	60644376	MACS_peak_1224	75.91
-chr13	55463887	55464601	MACS_peak_2197	89.38
-chr3	138158153	138158835	MACS_peak_5576	101.13
-chr9	61779725	61780181	MACS_peak_8335	52.74
-chr5	141092685	141093127	MACS_peak_6724	60.64
-chr4	151560621	151560894	MACS_peak_6192	80.71
-chr12	71087816	71088552	MACS_peak_1856	105.21
-chr3	136623971	136624307	MACS_peak_5565	54.21
-chr18	64675715	64676128	MACS_peak_4137	67.15
-chr5	93521864	93522250	MACS_peak_6451	50.34
-chr14	27666233	27666572	MACS_peak_2457	95.10
-chr17	65649466	65649790	MACS_peak_3914	74.21
-chr3	96961630	96962284	MACS_peak_5365	62.03
-chr19	46681813	46682242	MACS_peak_4433	64.27
-chr5	33677654	33678040	MACS_peak_6302	133.38
-chr1	155123197	155123551	MACS_peak_454	52.75
-chr11	104222718	104223628	MACS_peak_1551	135.09
-chr12	40834638	40835084	MACS_peak_1801	104.85
-chr5	140797328	140797751	MACS_peak_6714	136.27
-chr8	124636207	124636603	MACS_peak_8095	55.87
-chr1	33776550	33777146	MACS_peak_29	54.49
-chr2	127277423	127277785	MACS_peak_4871	62.22
-chr16	11144052	11144357	MACS_peak_3337	75.98
-chr2	71759141	71759569	MACS_peak_4740	61.84
-chr5	144654264	144654609	MACS_peak_6750	202.40
-chr6	136416896	136417766	MACS_peak_7155	107.68
-chr19	61160284	61160710	MACS_peak_4445	79.27
-chr5	135513632	135514247	MACS_peak_6652	52.16
-chr10	69559457	69559926	MACS_peak_764	75.43
-chr19	34625289	34625732	MACS_peak_4369	58.58
-chr3	129778582	129778971	MACS_peak_5530	52.74
-chr3	40549079	40549989	MACS_peak_5170	139.15
-chr12	63655639	63655947	MACS_peak_1841	75.70
-chr12	88027775	88028206	MACS_peak_1939	57.42
-chr4	149930560	149930906	MACS_peak_6169	51.58
-chr7	26175003	26175297	MACS_peak_7251	193.27
-chr3	137631502	137632466	MACS_peak_5570	270.50
-chr7	75095358	75096309	MACS_peak_7396	276.27
-chr13	112597147	112598260	MACS_peak_2361	120.74
-chr8	73397210	73397892	MACS_peak_7870	88.07
-chr10	57870391	57870790	MACS_peak_723	61.56
-chr12	21379875	21380271	MACS_peak_1759	71.11
-chr4	149229209	149229627	MACS_peak_6162	68.37
-chr11	79454167	79454630	MACS_peak_1371	103.29
-chr2	118577801	118578303	MACS_peak_4831	83.83
-chr12	90031052	90031356	MACS_peak_1953	67.25
-chr3	89221936	89222334	MACS_peak_5302	55.52
-chr11	49015967	49017374	MACS_peak_1149	140.92
-chr5	101854756	101855187	MACS_peak_6476	57.25
-chr14	55045118	55046069	MACS_peak_2572	278.29
-chr8	122360636	122360974	MACS_peak_8068	72.98
-chr6	29559590	29559996	MACS_peak_6810	66.51
-chr8	37675573	37676057	MACS_peak_7806	52.74
-chr7	135604640	135605584	MACS_peak_7660	140.03
-chr7	75215546	75215889	MACS_peak_7400	73.17
-chr11	6387328	6387780	MACS_peak_1054	66.34
-chr6	97171581	97172040	MACS_peak_7033	76.80
-chr2	71652110	71652536	MACS_peak_4738	52.74
-chr14	70205001	70205574	MACS_peak_2702	90.17
-chr7	4636478	4636845	MACS_peak_7189	84.01
-chr1	163697037	163697580	MACS_peak_480	152.14
-chr14	69905127	69905516	MACS_peak_2694	60.69
-chr4	105905243	105905631	MACS_peak_5853	53.13
-chr19	43763805	43764296	MACS_peak_4405	84.01
-chr15	98863988	98864259	MACS_peak_3288	109.33
-chr8	28268378	28268863	MACS_peak_7783	143.30
-chr5	50210130	50210512	MACS_peak_6329	203.50
-chr1	49424163	49424526	MACS_peak_75	70.90
-chr11	114416815	114417130	MACS_peak_1628	75.04
-chr2	29967973	29968359	MACS_peak_4603	84.01
-chr11	87275081	87275514	MACS_peak_1443	92.18
-chr9	72510503	72510911	MACS_peak_8403	63.21
-chr18	32996570	32997045	MACS_peak_4049	106.79
-chr7	108812030	108812396	MACS_peak_7522	58.27
-chr11	61377499	61378145	MACS_peak_1228	59.79
-chr5	141051472	141051938	MACS_peak_6718	69.03
-chr13	36416595	36416984	MACS_peak_2127	55.95
-chr9	14446069	14446592	MACS_peak_8182	98.84
-chr10	117850777	117851031	MACS_peak_971	57.79
-chr8	126502767	126503316	MACS_peak_8125	69.46
-chr6	66891898	66892295	MACS_peak_6917	59.59
-chr4	122959709	122960251	MACS_peak_5935	121.49
-chr12	60308039	60308451	MACS_peak_1839	80.38
-chr5	137108320	137108562	MACS_peak_6673	91.45
-chr4	129373773	129374378	MACS_peak_5972	131.90
-chr2	45268392	45268789	MACS_peak_4692	56.57
-chr5	141120758	141121124	MACS_peak_6726	71.31
-chr16	30453372	30453956	MACS_peak_3392	106.68
-chrX	71542249	71542578	MACS_peak_8628	64.97
-chr12	72743380	72743811	MACS_peak_1864	61.64
-chrX	108755267	108755697	MACS_peak_8648	65.99
-chr9	45983547	45983831	MACS_peak_8258	57.79
-chr14	63049340	63049683	MACS_peak_2644	61.79
-chr7	105719591	105719912	MACS_peak_7498	88.72
-chr7	65987933	65988294	MACS_peak_7377	74.48
-chr7	26496882	26497392	MACS_peak_7260	73.32
-chr3	157588086	157588412	MACS_peak_5676	86.48
-chr5	66089157	66089851	MACS_peak_6367	87.07
-chr1	63823189	63823575	MACS_peak_148	56.59
-chr19	8872798	8873377	MACS_peak_4283	89.38
-chr2	179759459	179759977	MACS_peak_5073	51.39
-chr6	128611850	128612175	MACS_peak_7127	74.12
-chr6	125049277	125049748	MACS_peak_7109	130.85
-chr14	58645884	58646276	MACS_peak_2621	52.85
-chr7	20080932	20081328	MACS_peak_7239	66.58
-chr2	131917466	131917870	MACS_peak_4911	59.48
-chr5	3152015	3152483	MACS_peak_6238	160.90
-chr2	132512500	132512844	MACS_peak_4916	59.86
-chrX	99352299	99352715	MACS_peak_8645	66.94
-chr18	55059820	55060479	MACS_peak_4108	118.53
-chr3	40456923	40457382	MACS_peak_5169	255.48
-chr11	57331929	57332212	MACS_peak_1192	57.79
-chr9	65389306	65389659	MACS_peak_8364	62.73
-chr6	30252722	30253129	MACS_peak_6815	70.78
-chr9	74844269	74844678	MACS_peak_8409	69.94
-chr3	79787772	79788143	MACS_peak_5255	89.40
-chr5	97259867	97260133	MACS_peak_6460	68.02
-chr7	147392845	147393149	MACS_peak_7705	66.02
-chrX	71516418	71516883	MACS_peak_8627	52.74
-chr4	135841295	135841647	MACS_peak_6056	62.73
-chr17	34781424	34781705	MACS_peak_3772	57.79
-chr6	108654497	108654889	MACS_peak_7052	82.04
-chr1	88337836	88338284	MACS_peak_244	100.57
-chr16	18876401	18877082	MACS_peak_3353	122.18
-chr15	86033062	86033641	MACS_peak_3242	60.29
-chr11	16851380	16851985	MACS_peak_1071	71.31
-chr7	125272857	125273444	MACS_peak_7585	163.78
-chr12	53738815	53739179	MACS_peak_1825	70.07
-chr2	156665349	156666095	MACS_peak_4998	100.62
-chr7	133942356	133942843	MACS_peak_7630	98.04
-chr9	90020990	90021384	MACS_peak_8443	52.74
-chr11	83658247	83658585	MACS_peak_1398	60.37
-chr14	52103248	52103624	MACS_peak_2554	55.37
-chr18	36446981	36447399	MACS_peak_4078	89.45
-chr14	22367170	22367534	MACS_peak_2429	72.40
-chr15	53498017	53498814	MACS_peak_3061	116.56
-chr11	87256810	87257164	MACS_peak_1441	111.94
-chr9	122859679	122860394	MACS_peak_8568	109.13
-chr1	23930853	23931253	MACS_peak_23	249.39
-chr12	70598412	70598748	MACS_peak_1852	89.26
-chr13	51943389	51943842	MACS_peak_2182	104.20
-chr19	29138427	29138708	MACS_peak_4343	57.79
-chr8	81885020	81885368	MACS_peak_7895	87.82
-chr11	106277303	106277812	MACS_peak_1574	61.29
-chr14	119583365	119583707	MACS_peak_2862	52.74
-chr6	32801035	32801303	MACS_peak_6834	51.52
-chr10	94394483	94395062	MACS_peak_900	88.16
-chr3	37565697	37565925	MACS_peak_5162	90.55
-chr3	145588349	145588654	MACS_peak_5611	80.31
-chr19	23061529	23061970	MACS_peak_4334	65.26
-chr17	26989228	26989502	MACS_peak_3680	76.86
-chr1	95970905	95971720	MACS_peak_298	160.55
-chr4	108520352	108520992	MACS_peak_5880	118.67
-chr3	26391575	26392055	MACS_peak_5126	88.33
-chr3	8919741	8920292	MACS_peak_5104	77.70
-chr1	29104970	29105334	MACS_peak_25	70.82
-chr16	58637925	58638495	MACS_peak_3475	107.78
-chr15	57966348	57966985	MACS_peak_3075	149.78
-chr13	115022343	115022626	MACS_peak_2368	64.17
-chr11	67905507	67905890	MACS_peak_1243	200.30
-chr17	29330165	29330593	MACS_peak_3730	142.62
-chr11	119161198	119161769	MACS_peak_1689	135.03
-chr4	140249323	140249820	MACS_peak_6089	56.52
-chr1	35926096	35926623	MACS_peak_38	55.35
-chr1	59412217	59412591	MACS_peak_129	51.22
-chr2	181414705	181415126	MACS_peak_5096	57.92
-chr17	57418275	57418714	MACS_peak_3896	78.38
-chr8	87246451	87247128	MACS_peak_7927	93.62
-chr12	81913169	81913458	MACS_peak_1914	52.00
-chr9	88275002	88275236	MACS_peak_8441	69.53
-chr11	103078799	103079762	MACS_peak_1540	129.73
-chr7	148141747	148142194	MACS_peak_7708	52.74
-chr19	41338432	41338860	MACS_peak_4389	51.90
-chr16	91538765	91539078	MACS_peak_3515	52.29
-chr7	132761686	132762056	MACS_peak_7609	66.03
-chr5	138070239	138070549	MACS_peak_6688	62.84
-chr1	174294816	174295355	MACS_peak_528	124.08
-chr19	41912152	41912595	MACS_peak_4394	51.03
-chr3	96217894	96218423	MACS_peak_5355	67.16
-chr8	11393666	11393996	MACS_peak_7740	77.69
-chr15	37172600	37172975	MACS_peak_3007	91.99
-chr1	173611130	173611397	MACS_peak_522	76.86
-chr1	133022808	133023128	MACS_peak_372	72.77
-chr1	88454389	88454942	MACS_peak_252	84.11
-chr5	34856205	34856831	MACS_peak_6311	57.06
-chr7	71082000	71082779	MACS_peak_7382	80.20
-chr14	63736378	63736637	MACS_peak_2648	59.25
-chr19	32843677	32843920	MACS_peak_4364	63.61
-chr3	138702613	138702924	MACS_peak_5578	62.02
-chr17	86566107	86566474	MACS_peak_3976	84.24
-chr8	96910090	96910444	MACS_peak_7966	52.74
-chr13	112430419	112430951	MACS_peak_2359	81.28
-chr10	42013834	42014255	MACS_peak_700	66.60
-chr11	31517779	31518060	MACS_peak_1118	68.02
-chr18	5101351	5101714	MACS_peak_4000	52.05
-chr9	62724326	62725109	MACS_peak_8338	201.21
-chr9	99083674	99084236	MACS_peak_8465	132.54
-chr4	134827884	134829500	MACS_peak_6036	231.64
-chr17	13498739	13499070	MACS_peak_3624	73.59
-chr2	103006169	103006579	MACS_peak_4802	67.36
-chr15	6925244	6925735	MACS_peak_2914	54.92
-chr7	53078238	53078607	MACS_peak_7352	51.59
-chr2	90910384	90910774	MACS_peak_4782	68.81
-chr14	60870155	60870663	MACS_peak_2627	98.84
-chr2	118798450	118798941	MACS_peak_4834	74.57
-chr11	100870661	100871158	MACS_peak_1523	172.41
-chr11	87562630	87563498	MACS_peak_1450	403.76
-chr1	88154721	88155315	MACS_peak_239	86.42
-chr11	83112056	83112724	MACS_peak_1391	71.38
-chr12	101425557	101426166	MACS_peak_1973	147.38
-chr6	85401368	85402272	MACS_peak_6974	228.57
-chr11	78966191	78966722	MACS_peak_1368	117.55
-chr3	129236434	129236906	MACS_peak_5523	68.23
-chr9	109777897	109778345	MACS_peak_8516	77.56
-chr3	88426615	88427427	MACS_peak_5292	205.76
-chr1	46004702	46005146	MACS_peak_69	84.01
-chr5	76126811	76127048	MACS_peak_6401	66.98
-chr10	59405079	59405490	MACS_peak_730	58.60
-chr1	9690569	9690998	MACS_peak_3	64.27
-chr11	88281205	88281634	MACS_peak_1478	51.83
-chr10	21199165	21199653	MACS_peak_652	194.61
-chr1	173433353	173434146	MACS_peak_518	161.22
-chr12	35731430	35731910	MACS_peak_1792	101.79
-chr15	38446130	38446559	MACS_peak_3017	57.57
-chr4	144679039	144679338	MACS_peak_6130	76.57
-chr10	92865497	92865758	MACS_peak_887	57.79
-chr14	121027316	121027735	MACS_peak_2865	55.30
-chr3	96530843	96531751	MACS_peak_5362	201.73
-chr16	91406386	91406908	MACS_peak_3506	89.38
-chr5	67336216	67336546	MACS_peak_6372	54.72
-chr3	89746156	89746412	MACS_peak_5305	60.79
-chr14	106991035	106991399	MACS_peak_2826	58.26
-chr1	36186077	36186421	MACS_peak_41	72.46
-chr14	66211596	66212092	MACS_peak_2670	58.08
-chr2	127911067	127911519	MACS_peak_4875	63.04
-chr8	73335210	73335509	MACS_peak_7867	62.14
-chr17	24291509	24291795	MACS_peak_3646	66.03
-chr16	92938013	92938490	MACS_peak_3549	62.73
-chr11	3279575	3280176	MACS_peak_1024	89.38
-chr6	32447109	32447460	MACS_peak_6833	67.55
-chr1	133724229	133724871	MACS_peak_379	70.44
-chr3	138152249	138152833	MACS_peak_5575	98.84
-chr1	38420121	38420414	MACS_peak_60	72.51
-chr14	55224814	55225200	MACS_peak_2579	50.34
-chr4	140624561	140624920	MACS_peak_6101	83.78
-chr2	106328336	106328622	MACS_peak_4811	57.79
-chr11	114335454	114335791	MACS_peak_1626	77.00
-chr1	133850783	133851479	MACS_peak_381	101.13
-chr15	101084784	101085109	MACS_peak_3307	66.49
-chr1	121422851	121423230	MACS_peak_328	58.94
-chr5	50093335	50093768	MACS_peak_6328	78.73
-chr17	44569507	44569864	MACS_peak_3811	119.82
-chr9	40965392	40966162	MACS_peak_8238	135.71
-chr18	57409148	57409560	MACS_peak_4116	95.69
-chr11	106227571	106228161	MACS_peak_1573	68.64
-chr12	106264328	106264856	MACS_peak_1994	115.56
-chr11	51694649	51695006	MACS_peak_1167	50.61
-chr14	73304152	73304540	MACS_peak_2723	56.45
-chr13	38249483	38249806	MACS_peak_2137	53.73
-chr17	23939899	23940349	MACS_peak_3642	104.48
-chr8	13353101	13353525	MACS_peak_7749	169.06
-chr6	134203272	134203641	MACS_peak_7145	68.62
-chr13	3869743	3870092	MACS_peak_2043	57.73
-chr14	71173919	71174385	MACS_peak_2717	65.91
-chr15	8711544	8712011	MACS_peak_2923	52.74
-chr14	60883642	60884282	MACS_peak_2628	73.32
-chr6	100238263	100238692	MACS_peak_7041	79.31
-chr18	43246353	43246775	MACS_peak_4094	54.45
-chr3	32427840	32428279	MACS_peak_5142	65.39
-chr4	114176976	114177339	MACS_peak_5890	69.10
-chr15	24413374	24413825	MACS_peak_2953	72.47
-chr17	24388206	24388689	MACS_peak_3648	52.74
-chr2	31983332	31984086	MACS_peak_4626	138.76
-chr1	82784012	82784449	MACS_peak_226	85.75
-chr11	115527669	115527986	MACS_peak_1642	74.85
-chr4	133958319	133958921	MACS_peak_6017	111.16
-chr3	33698778	33699287	MACS_peak_5144	72.46
-chr14	122276884	122277969	MACS_peak_2882	101.55
-chr12	87310571	87310976	MACS_peak_1938	95.23
-chr13	58545231	58545547	MACS_peak_2208	75.30
-chr4	151382308	151383335	MACS_peak_6186	211.80
-chr4	107730838	107731131	MACS_peak_5875	76.86
-chr7	127973750	127974191	MACS_peak_7591	56.60
-chr13	51831495	51831924	MACS_peak_2177	53.62
-chr6	113256331	113257237	MACS_peak_7059	493.29
-chr18	75366936	75367315	MACS_peak_4186	69.64
-chr8	83893580	83893897	MACS_peak_7903	64.52
-chr6	82852344	82853038	MACS_peak_6953	269.15
-chr5	123271183	123271619	MACS_peak_6591	123.24
-chr14	47344721	47345104	MACS_peak_2524	52.77
-chr3	152575073	152575476	MACS_peak_5646	52.74
-chr3	145596911	145597202	MACS_peak_5612	83.77
-chr9	63985221	63985609	MACS_peak_8354	62.36
-chr1	58851809	58852216	MACS_peak_124	60.72
-chr4	119140346	119140886	MACS_peak_5928	82.34
-chr17	24486224	24486632	MACS_peak_3649	78.65
-chr6	34614205	34614765	MACS_peak_6840	62.73
-chr17	50210060	50210574	MACS_peak_3861	61.75
-chr18	31945760	31946081	MACS_peak_4043	57.12
-chr17	23680231	23680767	MACS_peak_3638	53.90
-chr15	38129140	38129536	MACS_peak_3012	59.66
-chr8	113782043	113782424	MACS_peak_8040	69.49
-chr18	36388485	36388844	MACS_peak_4075	60.49
-chr14	35176361	35177236	MACS_peak_2498	90.81
-chr15	58986223	58986533	MACS_peak_3088	76.07
-chr15	38230489	38231164	MACS_peak_3015	108.56
-chr2	26207239	26207961	MACS_peak_4567	93.92
-chr17	31700930	31701353	MACS_peak_3750	79.51
-chr14	69764756	69765167	MACS_peak_2692	66.11
-chr1	82630866	82631590	MACS_peak_222	118.15
-chr13	63463080	63463547	MACS_peak_2222	137.87
-chr9	88333602	88334003	MACS_peak_8442	170.03
-chr1	108414102	108414881	MACS_peak_317	194.11
-chr17	71202093	71202486	MACS_peak_3924	71.93
-chr2	4397930	4398339	MACS_peak_4456	96.07
-chr19	6313436	6313784	MACS_peak_4261	67.80
-chr17	47430218	47430545	MACS_peak_3836	57.96
-chr1	88383641	88384502	MACS_peak_245	82.34
-chr15	99307421	99307832	MACS_peak_3292	54.82
-chr10	87525443	87525844	MACS_peak_870	52.21
-chr1	137560338	137560774	MACS_peak_409	84.01
-chr2	98177089	98177336	MACS_peak_4795	66.43
-chr6	146904425	146904852	MACS_peak_7174	57.52
-chr3	88489494	88489890	MACS_peak_5293	71.68
-chrX	160870684	160870964	MACS_peak_8672	76.86
-chr5	96978763	96979127	MACS_peak_6459	66.50
-chr11	117515601	117516136	MACS_peak_1677	87.38
-chr8	129497368	129498236	MACS_peak_8150	121.81
-chr9	44134590	44135169	MACS_peak_8247	90.62
-chr3	157699307	157699805	MACS_peak_5678	169.27
-chr1	184472944	184473657	MACS_peak_582	98.66
-chr2	165780325	165780779	MACS_peak_5042	95.23
-chr12	52792839	52793385	MACS_peak_1814	54.49
-chr13	23855588	23856068	MACS_peak_2092	126.79
-chr18	83456033	83456433	MACS_peak_4211	68.08
-chr10	14425071	14425602	MACS_peak_620	94.14
-chr13	41582171	41582615	MACS_peak_2143	65.06
-chr10	94786191	94786440	MACS_peak_905	106.55
-chr8	109816960	109817536	MACS_peak_8026	135.33
-chr12	81878967	81879611	MACS_peak_1912	119.33
-chr7	26059620	26059929	MACS_peak_7250	118.92
-chr4	62176380	62176995	MACS_peak_5799	54.70
-chr18	53436462	53436882	MACS_peak_4104	79.74
-chr11	51502212	51502759	MACS_peak_1164	56.31
-chr19	37281848	37282416	MACS_peak_4379	68.11
-chr16	92710525	92711133	MACS_peak_3535	77.20
-chr3	120874131	120875192	MACS_peak_5483	293.69
-chr13	17696479	17697123	MACS_peak_2068	170.31
-chr19	46077680	46078113	MACS_peak_4423	65.79
-chr5	143682333	143682913	MACS_peak_6736	222.86
-chr1	154568166	154568685	MACS_peak_448	95.23
-chr9	81108308	81108891	MACS_peak_8430	145.29
-chr12	44270399	44270849	MACS_peak_1806	72.54
-chr5	106128761	106129096	MACS_peak_6489	54.49
-chr4	120903868	120904161	MACS_peak_5932	76.86
-chr3	68460731	68461147	MACS_peak_5234	137.12
-chr3	58329271	58329978	MACS_peak_5204	131.98
-chr4	151462216	151462538	MACS_peak_6188	74.40
-chr17	34059082	34059521	MACS_peak_3763	62.02
-chr15	76554091	76554392	MACS_peak_3159	72.02
-chr2	117013160	117013597	MACS_peak_4828	61.25
-chr6	8719505	8720279	MACS_peak_6785	237.86
-chr6	102485635	102486092	MACS_peak_7043	67.00
-chr18	65959921	65960294	MACS_peak_4153	65.80
-chr11	3308916	3309259	MACS_peak_1025	175.63
-chr11	44333429	44333838	MACS_peak_1139	94.32
-chr1	9933866	9934292	MACS_peak_4	151.02
-chr2	49701652	49702023	MACS_peak_4700	86.63
-chr5	147072457	147073005	MACS_peak_6754	70.79
-chr14	21993252	21993662	MACS_peak_2423	117.33
-chr14	76186318	76187158	MACS_peak_2750	90.56
-chr3	139131096	139131484	MACS_peak_5580	50.20
-chr7	26466797	26467478	MACS_peak_7255	106.79
-chr1	123464347	123464745	MACS_peak_335	70.93
-chr7	132615669	132615970	MACS_peak_7605	72.43
-chr10	86169149	86169550	MACS_peak_867	103.08
-chr13	49863349	49863865	MACS_peak_2174	74.46
-chr7	86508733	86509023	MACS_peak_7431	62.85
-chr11	77378344	77378754	MACS_peak_1326	82.92
-chr6	86634390	86634683	MACS_peak_6980	103.74
-chr10	116871908	116872390	MACS_peak_961	57.59
-chr14	41750272	41750682	MACS_peak_2513	95.75
-chr4	151469965	151470437	MACS_peak_6189	78.61
-chr10	60875630	60876040	MACS_peak_740	67.36
-chr6	91572361	91572723	MACS_peak_7021	70.91
-chr6	128795347	128795755	MACS_peak_7131	94.41
-chr18	58701674	58702163	MACS_peak_4120	87.63
-chr5	110698548	110699046	MACS_peak_6511	143.30
-chrX	93325546	93325822	MACS_peak_8637	91.45
-chr7	134432899	134433190	MACS_peak_7644	106.55
-chr12	29320679	29321102	MACS_peak_1769	54.01
-chr10	20124963	20125579	MACS_peak_639	217.11
-chr3	106416991	106417459	MACS_peak_5432	62.73
-chr10	33739040	33739699	MACS_peak_678	218.19
-chr8	74878095	74878556	MACS_peak_7883	94.87
-chr11	83382803	83383271	MACS_peak_1395	68.88
-chr3	58862746	58863399	MACS_peak_5208	157.00
-chr15	77685670	77686204	MACS_peak_3162	62.73
-chr14	86615475	86615825	MACS_peak_2782	87.58
-chr6	124867137	124867557	MACS_peak_7105	62.38
-chr13	55514580	55515153	MACS_peak_2199	117.94
-chr5	102254699	102255015	MACS_peak_6477	60.56
-chr2	25923796	25924257	MACS_peak_4561	56.91
-chr3	152061055	152061395	MACS_peak_5639	51.22
-chr6	86334798	86335187	MACS_peak_6976	60.17
-chr17	53706687	53707067	MACS_peak_3872	69.57
-chr7	18595991	18596427	MACS_peak_7225	65.59
-chr2	166445552	166446686	MACS_peak_5044	237.97
-chrX	12567874	12568307	MACS_peak_8607	92.18
-chr9	57187655	57188087	MACS_peak_8309	150.82
-chr3	96553930	96554350	MACS_peak_5363	58.24
-chr11	49794755	49795218	MACS_peak_1156	97.10
-chr12	86453801	86454179	MACS_peak_1931	69.72
-chr11	113544835	113545435	MACS_peak_1622	189.84
-chr14	14820991	14821374	MACS_peak_2395	50.82
-chr11	115936845	115937272	MACS_peak_1653	53.75
-chr9	66358765	66359165	MACS_peak_8375	66.29
-chr3	14944703	14945194	MACS_peak_5112	88.33
-chr4	147514611	147514949	MACS_peak_6139	51.38
-chr2	154429029	154429495	MACS_peak_4972	62.73
-chr17	21082204	21082596	MACS_peak_3632	82.04
-chr17	14304400	14304838	MACS_peak_3625	65.46
-chr5	65288018	65288287	MACS_peak_6358	72.51
-chr2	26518695	26519103	MACS_peak_4576	63.21
-chr1	9272967	9273590	MACS_peak_2	182.17
-chr9	79658076	79658784	MACS_peak_8427	64.31
-chr12	100872379	100872633	MACS_peak_1969	57.15
-chr2	128684574	128684999	MACS_peak_4881	74.42
-chr2	154558527	154559018	MACS_peak_4975	69.69
-chr19	61171106	61171535	MACS_peak_4446	66.05
-chr6	88431783	88432112	MACS_peak_7003	73.77
-chr3	136303454	136303877	MACS_peak_5562	56.77
-chr12	71328424	71329097	MACS_peak_1859	114.45
-chr6	72059443	72059831	MACS_peak_6930	67.17
-chr7	26514419	26514743	MACS_peak_7262	83.43
-chr12	88174557	88175940	MACS_peak_1945	236.16
-chr17	29530544	29530849	MACS_peak_3735	56.94
-chr10	80895647	80896054	MACS_peak_845	153.53
-chr7	82925765	82926126	MACS_peak_7421	129.84
-chr13	105607134	105607556	MACS_peak_2347	50.79
-chr10	19311634	19312299	MACS_peak_631	73.24
-chr11	113667257	113667748	MACS_peak_1624	426.44
-chr17	29301427	29301965	MACS_peak_3729	91.69
-chr3	28680019	28680490	MACS_peak_5135	89.03
-chr12	70548827	70549327	MACS_peak_1849	143.57
-chr9	57758663	57758985	MACS_peak_8322	57.05
-chr12	55963541	55963840	MACS_peak_1831	67.73
-chr10	80814336	80815092	MACS_peak_838	73.53
-chr9	106106639	106107165	MACS_peak_8487	92.77
-chr18	31948500	31948917	MACS_peak_4044	115.28
-chr3	97817136	97817796	MACS_peak_5372	108.56
-chr5	115608262	115608636	MACS_peak_6540	61.95
-chr1	92677004	92677359	MACS_peak_270	56.79
-chr4	154536407	154536875	MACS_peak_6204	179.88
-chr11	78866726	78867449	MACS_peak_1362	160.60
-chr1	58502152	58502677	MACS_peak_120	107.68
-chr1	78654241	78654789	MACS_peak_210	256.08
-chr17	91266249	91266654	MACS_peak_3987	67.72
-chr10	80392804	80393085	MACS_peak_829	60.79
-chr9	123168920	123169217	MACS_peak_8575	89.95
-chr18	64648305	64648906	MACS_peak_4135	50.43
-chr16	45025492	45026112	MACS_peak_3450	84.01
-chr11	120459679	120460020	MACS_peak_1701	71.31
-chr13	19606884	19607473	MACS_peak_2072	72.34
-chr7	134324277	134324565	MACS_peak_7637	91.45
-chr12	32908423	32908906	MACS_peak_1781	89.00
-chr15	24842100	24842543	MACS_peak_2955	75.93
-chr18	6489913	6490350	MACS_peak_4007	97.14
-chr14	122247568	122247901	MACS_peak_2881	54.94
-chr10	98856446	98856735	MACS_peak_921	64.17
-chr1	182741146	182741385	MACS_peak_566	91.45
-chr4	45045503	45045718	MACS_peak_5770	81.78
-chr1	95531960	95532820	MACS_peak_290	140.79
-chr2	152643374	152643779	MACS_peak_4954	153.80
-chr10	79317823	79318310	MACS_peak_795	114.99
-chr5	106146989	106147692	MACS_peak_6490	267.90
-chr4	34634278	34634904	MACS_peak_5728	116.61
--- a/chipsequtil-master/examples/nib/test_batch_fasta.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,9 +0,0 @@
-from chipsequtil import get_org_settings, BEDFile
-from chipsequtil.nib import NibDB
-from pprint import pprint
-
-genome_dir = get_org_settings('mm9')['genome_dir']
-db = NibDB(nib_dirs=[genome_dir])
-fasta_headers,seqs = db.get_fasta_from_bed('shuffled_peaks.bed')
-
-pprint(seqs[:10])
--- a/chipsequtil-master/examples/nib/test_nib_db.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,47 +0,0 @@
-from chipsequtil import get_org_settings, BEDFile
-from chipsequtil.nib import NibDB
-from pprint import pprint
-
-# see `org_settings.py -h` for more info on get_org_settings(<organism>) function
-genome_dir = get_org_settings('mm9')['genome_dir']
-
-# NibDB is an interface to a collection of nib files, typically corresponding
-# to chromosomes of a genome
-
-# example with only one nib file
-print 'NibDB with a single nib file'
-db = NibDB(nib_fns=[genome_dir+'/chr1.nib'])
-
-print 'NibDB info:'
-pprint(dict(db.db_info))
-
-# get a fasta record for some sequence
-print 'Example fasta record: chr1:1e8-1e8+100'
-print db.get_fasta('chr1',1e8,1e8+100)
-
-# get just the sequence
-print 'Same example, only sequence:'
-print db.get_seq('chr1',1e8,1e8+100)
-print
-
-
-# example with a directory of nib files
-print 'NibDB with a directory of nib files'
-db = NibDB(nib_dirs=[genome_dir])
-
-# get a fasta record for some sequence
-print 'Example fasta record: chr1:1e8-1e8+100'
-print db.get_fasta('chr1',1e8,1e8+100)
-
-print 'Example fasta record: chr1:1e8-1e8+100'
-print db.get_fasta('chr2',1e8,1e8+100)
-
-print 'Example fasta record: chr1:1e8-1e8+100'
-print db.get_fasta('chrX',1e8,1e8+100)
-
-
-# example of fetching all sequences from a bed file
-fasta_headers,seqs = db.get_fasta_from_bed('shuffled_peaks.bed')
-
-print 'Num. peaks:',len(open('shuffled_peaks.bed').readlines())
-pprint(seqs[:10])
Binary file chipsequtil-master/examples/seq/._test_chipsequtil_seq.py has changed
--- a/chipsequtil-master/examples/seq/test_chipsequtil_seq.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,19 +0,0 @@
-from StringIO import StringIO
-from chipsequtil.seq import FASTAFile, FASTQFile
-
-fasta_str = StringIO(">seq1\nACATAGGGAT\n>seq2\nTTATNTAGATA\n")
-fasta_f = FASTAFile(fasta_str)
-print fasta_f.headers
-
-print "[r for r in fasta_f]", [r for r in fasta_f]
-print "fasta_f['seq1']", fasta_f['seq1']
-print "fasta_f.headers", fasta_f.headers
-print "fasta_f.sequences", fasta_f.sequences
-
-fastq_str = StringIO("@seq1\nACATAGGGAT\n+seq2\nY^_cccQYJQ\n@seq2\nTTATNTAGATA\n+seq2\nY^_cJcQQJQ")
-fastq_f = FASTQFile(fastq_str)
-print "[r for r in fastq_f]", [r for r in fastq_f]
-print "fastq_f['seq1']", fastq_f['seq1']
-print "fastq_f.headers", fastq_f.headers
-print "fastq_f.sequences", fastq_f.sequences
-print "fastq_f.quals", fastq_f.quals
Binary file chipsequtil-master/scripts/._THEME.sh has changed
Binary file chipsequtil-master/scripts/._build_chipseq_infosite.py has changed
Binary file chipsequtil-master/scripts/._chipseq_pipeline.py has changed
Binary file chipsequtil-master/scripts/._chipseq_pipeline_wo_ctrl.py has changed
Binary file chipsequtil-master/scripts/._combine_gerald_stats.py has changed
Binary file chipsequtil-master/scripts/._compare_microarray_binding.py has changed
Binary file chipsequtil-master/scripts/._construct_bg_fasta.py has changed
Binary file chipsequtil-master/scripts/._create_pipeline_script.py has changed
Binary file chipsequtil-master/scripts/._extract_promoters.py has changed
Binary file chipsequtil-master/scripts/._filter_bed_by_position_count.py has changed
Binary file chipsequtil-master/scripts/._filter_gps_peaks.py has changed
Binary file chipsequtil-master/scripts/._filter_macs_peaks.py has changed
Binary file chipsequtil-master/scripts/._filter_mapped_known_genes.py has changed
Binary file chipsequtil-master/scripts/._generate_stats_doc.py has changed
Binary file chipsequtil-master/scripts/._gerald_stats.py has changed
Binary file chipsequtil-master/scripts/._gerald_to_bed.py has changed
Binary file chipsequtil-master/scripts/._integrate_macs_ucsc.py has changed
Binary file chipsequtil-master/scripts/._join_mapped_known_genes.py has changed
Binary file chipsequtil-master/scripts/._kg_to_gff.py has changed
Binary file chipsequtil-master/scripts/._map_intervals.py has changed
Binary file chipsequtil-master/scripts/._map_peaks_to_genes.py has changed
Binary file chipsequtil-master/scripts/._map_peaks_to_known_genes.py has changed
Binary file chipsequtil-master/scripts/._motif_scan.py has changed
Binary file chipsequtil-master/scripts/._nibFrag.py has changed
Binary file chipsequtil-master/scripts/._org_settings.py has changed
Binary file chipsequtil-master/scripts/._peaks_to_fasta.py has changed
Binary file chipsequtil-master/scripts/._plot_peak_loc_dist.py has changed
Binary file chipsequtil-master/scripts/._plot_pos_vs_neg_peaks.py has changed
Binary file chipsequtil-master/scripts/._probeset_to_known_gene.py has changed
Binary file chipsequtil-master/scripts/._rejection_sample_fasta.py has changed
Binary file chipsequtil-master/scripts/._sort_bed.py has changed
Binary file chipsequtil-master/scripts/._split_file.py has changed
Binary file chipsequtil-master/scripts/._split_qsub.py has changed
Binary file chipsequtil-master/scripts/._wait_for_jobid.py has changed
Binary file chipsequtil-master/scripts/._wait_for_qsub.py has changed
Binary file chipsequtil-master/scripts/._wqsub.py has changed
Binary file chipsequtil-master/scripts/._wqsub_drmaa.py has changed
--- a/chipsequtil-master/scripts/THEME.sh	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,177 +0,0 @@
-#!/bin/bash
-
-THEME_EXE=/nfs/data/cwng/archive/cvEM.64/THEME_edit.py
-
-OPT_SPEC='
-{
-"NAME": "THEME.sh",
-"DESC": "Run old THEME version",
-"ARGS": ["FG_FASTA","BG_FASTA","HYP_FN","MARKOV"],
-"OPTS": {
-    "CV":{"LONG":"--cv","DEFAULT":5,"TYPE":"int","HELP":"number of cross validation folds [default:%default]"},
-    "NOREFINE":{"LONG":"--no-refine","ACTION":"store_true","HELP":"do not run with refinement"},
-    "BETA":{"LONG":"--beta","DEFAULT":0.7,"TYPE":"float","HELP":"beta parameter to use [default:%default]"},
-    "DELTA":{"LONG":"--delta","DEFAULT":0.001,"TYPE":"float","HELP":"delta parameter to use [default:%default]"},
-    "RANDOMIZE":{"LONG":"--randomization","ACTION":"store_true","HELP":"run randomization"},
-    "MOTIF_FN":{"LONG":"--motif-file","DEFAULT":"dummy.out","HELP":"filename to write motif results to [default:%default]"},
-    "OUTPUT_FN":{"LONG":"--output-filename","DEFAULT":"dummy.txt","HELP":"filename to write motif results to [default:%default]"},
-    "RANDOM_FN":{"LONG":"--random-output","DEFAULT":"random.txt","HELP":"filename to write motif results to [default:%default]"},
-    "DUMP":{"LONG":"--dump","ACTION":"store_true","HELP":"dump categtories to file"},
-    "REM_COM":{"LONG":"--remove-common","ACTION":"store_true","HELP":"remove common sequences from analysis"},
-    "NOPARALLEL":{"LONG":"--no-parallelize","ACTION":"store_true","HELP":"do not use wqsub.py for parallelization"},
-    "INTERACTIVE":{"LONG":"--interactive","ACTION":"store_true","HELP":"run the script interactively"},
-    "HYP_INDS":{"LONG":"--hyp-indices","DEFAULT":"ALL","HELP":"0-based indices of hypotheses to run [default: %default]"},
-    "VERBOSE":{"SHORT":"-v","LONG":"--verbose","ACTION":"store_true","HELP":"print out the commands that are being run"},
-    "TRIALS":{"LONG":"--trials","HELP":"this option is here only for backwards compatibility with THEME.py"}
-    }
-}'
-OUTPUT=$(echo $OPT_SPEC | getopts.py --shell=bash -- $@)
-GETOPTS_RET=$?
-if [ $GETOPTS_RET -ne 0 ]; then
-    exit 1
-fi
-$OUTPUT
-
-INTERACTIVE_FLAG="--auto"
-if [ $INTERACTIVE != "None" ]; then
-    INTERACTIVE_FLAG=
-fi
-
-eval "$(steplist.py $INTERACTIVE_FLAG -t "Run THEME" THEME "Wait for jobs" "Combine results")"
-
-# run THEME
-OUTDIR=THEME_data
-test \! -e $OUTDIR && mkdir $OUTDIR
-
-WQSUB_EXE="wqsub.py"
-if [ $NOPARALLEL != "None" ]; then
-    WQSUB_EXE=
-fi
-
-RANDOMIZE_FLAG=
-if [ $RANDOMIZE != "None" ]; then
-    RANDOMIZE_FLAG="-randomization"
-fi
-
-RC=
-if [ $RC ]; then
-    RC='-rc'
-fi
-
-if [ $HYP_INDS != "ALL" ]; then
-    HYP_INDS=$(parse_steplist.py $HYP_INDS)
-    HYP_INDS_STATUS=$?
-    if [ $HYP_INDS_STATUS != 0 ]; then
-        echo "Incorrectly formatted argument to --hyp-indices option, aborting"
-        exit $HYP_INDS_STATUS
-    fi
-else
-    NUM_HYPS=`grep -c '^Source' $HYP_FN`
-    NUM_HYPS=$(($NUM_HYPS-1))
-    HYP_INDS=$(seq 0 $NUM_HYPS)
-fi
-
-JOBIDS=
-next_step && \
-for i in $HYP_INDS
-do
-
-    WQSUB=
-    REDIRECT=
-    if [ ! -z $WQSUB_EXE ]; then
-        WQSUB="$WQSUB_EXE --wqsub-name=THEME_$i"
-    fi
-
-    OUTPRE=$OUTDIR/$i
-
-    CMD="$WQSUB python $THEME_EXE $FG_FASTA $BG_FASTA $i \
-        -fse $HYP_FN -markov $MARKOV -cv $CV -beta $BETA \
-        -delta $DELTA -motif_file $OUTPRE.tamo -out_file $OUTPRE.txt \
-        $RC"
-    JOBID=$($WQSUB $CMD)
-    JOBIDS="$JOBID $JOBIDS"
-    if [ $VERBOSE != "None" ]; then
-        echo $WQSUB $CMD
-    fi
-
-    if [ $RANDOMIZE != "None" ]; then
-
-        WQSUB="$WQSUB_EXE --wqsub-name=THEME_rand_$i"
-
-        CMD="$WQSUB python $THEME_EXE $FG_FASTA $BG_FASTA $i \
-            -fse $HYP_FN -markov $MARKOV -cv $CV -beta $BETA \
-            -delta $DELTA -out_file ${OUTPRE}_rand_output.txt \
-            -random_file ${OUTPRE}_rand.txt $RC -randomization"
-
-        JOBID=$($WQSUB $CMD)
-        JOBIDS="$JOBID $JOBIDS"
-
-        if [ $VERBOSE != "None" ]; then
-            echo $WQSUB $CMD -randomization
-        fi
-    fi
-
-done
-
-
-# wait for jobs
-next_step && wait_for_jobid.py $JOBIDS
-
-# compile results
-next_step
-DO_COMPILE=$?
-if [ $DO_COMPILE == 0 ]; then
-
-    rm -f $MOTIF_FN && touch $MOTIF_FN
-    (
-        cd $OUTDIR
-        ls *.tamo | sort -n | xargs -n1 -I{} -t cat {} >> ../$MOTIF_FN
-    )
-
-    if [ $NOPARALLEL == "None" ]; then
-        mv -f *.{err,out} THEME_data
-    fi
-
-    if [ $RANDOMIZE != "None" ]; then
-        rm -f $RANDOM_FN && touch $RANDOM_FN
-        (
-            cd $OUTDIR
-            for ind in $HYP_INDS
-            do
-                out_fn="${ind}_rand.txt"
-                echo "Consolidating $out_fn"
-                python >> ../$RANDOM_FN << EOF
-import re
-import sys
-
-from TAMO.MotifTools import load
-
-ind = re.match('(\d+)',"$out_fn").group(1)
-
-motif = load("$HYP_FN")[int(ind)]
-
-src = motif.source.split()
-if len(src) == 0 :
-    print 'Got weird motif source: %s\n'%src
-src = src[0]+'_%s'%ind
-
-mot_str = str(motif)
-
-cverrs = []
-for l in open("$out_fn") :
-    m = re.match("trial: \d+ mean test error: (\d+\.\d+)$",l)
-    if m is not None :
-         cverrs.append(float(m.group(1)))
-
-print "\t".join([src,mot_str,str(sum(cverrs)/len(cverrs)),repr(cverrs)])
-sys.stdout.flush()
-
-EOF
-            done
-
-        )
-
-    compile_THEME_results.py $MOTIF_FN $RANDOM_FN --output=$OUTPUT_FN
-
-    fi
-fi
--- a/chipsequtil-master/scripts/build_chipseq_infosite.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,675 +0,0 @@
-#!/usr/bin/env python
-
-import getpass
-import glob
-import json
-import matplotlib
-matplotlib.use('AGG')
-import matplotlib.pyplot as mp
-import os
-import re
-import shutil
-import sys
-
-from collections import defaultdict
-from csv import reader, writer, DictReader
-from math import log
-from optparse import OptionParser
-from subprocess import call
-
-from chipsequtil import MACSFile, get_org_settings
-from reStUtil import *
-
-usage = '%prog [options] [<peak filename> <peak filename> ...]'
-parser = OptionParser(usage=usage)
-parser.add_option('-d','--dir',dest='dir',default='.',help='Source directory [default: %default]')
-parser.add_option('-n','--name',dest='name',help='Experiment name [default: current directory name]')
-parser.add_option('--skip-motif-scan',dest='skip_motif_scan',action='store_true',help="skip motif_scan.py, but still build motifs into document (assumes motif_scan.py was previously run)")
-parser.add_option('--skip-motif-stuff',dest='skip_motif_stuff',action='store_true',help="motif stuff takes a long time, manually skip it if no motif results are available or you don't care about them")
-
-{
-  "experiment path": "/nfs/antdata/analysis/100809_P/100809_St7-10ul-p300_degenhar_Fraenkel_L2_mm9_sorted.bed",
-  "analysis path": "/net/ventral/nfs/people/labadorf/analysis/100809_P_St7_10ul",
-  "stage url": "http://fraenkel.mit.edu/stage/labadorf",
-  "peak files": {
-    "100809_P_St7_10ul_mfold10,30_pval1e-5": {
-      "total tags in control": 9331149,
-      "total tags in treatment": 10064908,
-      "Range for calculating regional lambda": "1000 bps and 10000 bps",
-      "tag size": 35,
-      "name": "100809_P_St7_10ul_mfold10,30_pval1e-5",
-      "model fold": "10,30",
-      "format": "BED",
-      "tags after filtering in treatment": 5099883,
-      "band width": 150,
-      "Redundant rate in control": 0.40999999999999998,
-      "Redundant rate in treatment": 0.48999999999999999,
-      "effective genome size": 2110000000.0,
-      "d": 145,
-      "maximum duplicate tags at the same position in control": 1,
-      "control file": "cntrl_6-3_sorted_filterbed.txt",
-      "MACS version": "1.4.0beta",
-      "ChIP-seq file": "exp_100809_St7-10ul-p300_degenhar_Fraenkel_L2_mm9_sorted.bed",
-      "tags after filtering in control": 5481613,
-      "maximum duplicate tags at the same position in treatment": 2,
-      "pvalue cutoff": 1.0000000000000001e-05
-    }
-  },
-  "format": "BED",
-  "FDR filter": "none",
-  "experiment name": "100809_P_St7_10ul",
-  "mapping type": "TSS",
-  "pipeline args": {
-    "--filter-peaks-args": "--sort-by=pvalue --top=200",
-    "--macs-args": "--mfold=10,30 --tsize=35 --bw=150 --format=BED --pvalue=1e-5",
-    "--map-args": "--tss --upstream-window=10000 --downstream-window=10000"
-  },
-  "org": "mm9",
-  "control path": "/nfs/antdata/analysis/090828_42JVC/6-3/6-3_sorted_filterbed.txt",
-  "mapping window": [
-    "10000",
-    "10000"
-  ],
-  "peaks used by THEME": "200",
-  "stage_dir": "/nfs/antdata/web_stage/labadorf"
-}
-
-if __name__ == '__main__' :
-
-    opts, args = parser.parse_args(sys.argv[1:])
-
-    exp_dir = os.path.abspath(opts.dir)
-    exp_name = opts.name if opts.name is not None else os.path.basename(exp_dir)
-
-    # 1. find the param JSON file
-    param_json_fn = glob.glob('*params.json')
-    if len(param_json_fn) == 0 :
-        sys.stderr.write('Could not find parameter file, building one as best I can\n')
-        curr_user = getpass.getuser()
-        json_d = {'analysis path':os.getcwd(),
-                  'stage url':'http://fraenkel.mit.edu/stage/'+curr_user,
-                  'stage dir':'/nfs/antdata/web_stage/'+curr_user
-                 }
-    else :
-        if len(param_json_fn) > 1 :
-            sys.stderr.write('Found more than one parameter file, picking the first one: %s\n'%','.join(param_json_fn))
-        param_json_fn = param_json_fn[0]
-        json_d = json.load(open(param_json_fn))
-
-    # 2. make a new directory to save all the stuff
-    infosite_dir_name = exp_name+'_infosite'
-    infosite_path = os.path.join(os.getcwd(),infosite_dir_name)
-    if not os.path.exists(infosite_path) :
-        os.mkdir(infosite_path)
-
-    infosite_img_path = os.path.join(infosite_path,'images')
-    if not os.path.exists(infosite_img_path) :
-        os.mkdir(infosite_img_path)
-
-    # 3. setup web staging directory
-    stage_dir_path = os.path.join(json_d['stage dir'],infosite_dir_name)
-    if not os.path.exists(stage_dir_path) :
-        os.symlink(infosite_path,stage_dir_path)
-
-    # 4. get the peaks files stats, don't want negative peaks
-    if len(args) == 0 :
-        peaks_fns = glob.glob('*_peaks.xls')
-        peaks_fns = filter(lambda x: 'negative' not in x,peaks_fns)
-    else :
-        peaks_fns = args
-    analysis_sets = []
-    peak_json = json_d['peak files'] = {}
-
-    # analyze all the peak files
-    for peak_fn in peaks_fns :
-        print 'processing:',peak_fn
-        macs_f = MACSFile(peak_fn)
-        peak_json[peak_fn] = macs_f.file_info
-
-        # positive peaks
-        peak_stats = defaultdict(list)
-        num_peaks = 0
-        pos_chr_dist = defaultdict(int)
-        for peak in macs_f :
-            pos_chr_dist[peak['chr']] += 1
-            peak_stats['length'].append(peak['length'])
-            peak_stats['tags'].append(peak['tags'])
-            peak_stats['pvalue'].append(peak['-10*log10(pvalue)'])
-            peak_stats['fold_enrichment'].append(peak['fold_enrichment'])
-            peak_stats['fdr'].append(peak['FDR(%)'])
-            num_peaks += 1
-
-        peak_json[peak_fn]['positive peaks'] = num_peaks
-        peak_json[peak_fn]['reads under peaks'] = sum(peak_stats['tags'])
-
-        # extract paired peaks info out of output.txt
-        output_fn = peak_json[peak_fn]['name']+'_output.txt'
-        output_regexes = ('#2 number of (paired peaks): (\d+)',)
-        for l in open(output_fn) :
-            for regex in output_regexes :
-                m = re.search(regex,l)
-                if m is not None :
-                    peak_json[peak_fn][m.group(1)] = int(m.group(2))
-
-        # do the negative peaks
-        # negative peak file is now filtered
-        neg_peak_fns = glob.glob(peak_json[peak_fn]['name']+'_negative_peaks_*.xls')
-
-        #TODO - do check for file exists
-        if neg_peak_fns :
-            neg_peak_fn = neg_peak_fns[0]
-            neg_peak_f = MACSFile(neg_peak_fn)
-
-            neg_peak_stats = defaultdict(list)
-            num_peaks = 0
-            neg_chr_dist = defaultdict(int)
-            for peak in neg_peak_f :
-                neg_chr_dist[peak['chr']] += 1
-                neg_peak_stats['length'].append(peak['length'])
-                neg_peak_stats['tags'].append(peak['tags'])
-                neg_peak_stats['pvalue'].append(peak['-10*log10(pvalue)'])
-                neg_peak_stats['fold_enrichment'].append(peak['fold_enrichment'])
-                neg_peak_stats['fdr'].append(peak['FDR(%)'])
-                num_peaks += 1
-
-            peak_json[peak_fn]['negative peaks'] = num_peaks
-            peak_json[peak_fn]['reads under negative peaks'] = sum(peak_stats['tags'])
-        else :
-            peak_json[peak_fn]['negative peaks'] = 'NA'
-            peak_json[peak_fn]['reads under negative peaks'] = 'NA'
-
-        # save the track lines
-        ucsc_track_fn = peak_json[peak_fn]['name']+'_MACS_wiggle_tracks.txt'
-        if os.path.exists(ucsc_track_fn) :
-            peak_json[peak_fn]['ucsc tracks'] = open(ucsc_track_fn).readlines()
-
-        font = {'size':'9'}
-        mp.rc('font',**font)
-
-        figsize = (3.5,3.5)
-        subplots_sizes = {'top':0.8,'left':0.15,'right':0.95}
-        hist_labels = ('+ peaks','- peaks')
-        # create histograms for each of the attributes
-        len_hist_name = macs_f.file_info['name']+'_length.png'
-        len_hist_fn = os.path.join(infosite_img_path,len_hist_name)
-        len_hist_url = json_d['stage url']+'/'+infosite_dir_name+'/images/'+len_hist_name
-        peak_json[peak_fn]['length distribution url'] = len_hist_url
-        mp.figure(figsize=figsize)
-        mp.subplots_adjust(**subplots_sizes)
-        mp.hist((peak_stats['length'],neg_peak_stats['length']),label=hist_labels,bins=20,log=True)
-        mp.title('%s\npeak length distribution'%macs_f.file_info['name'])
-        mp.xlabel('peak length')
-        mp.ylabel('# peaks')
-        mp.legend()
-        mp.savefig(len_hist_fn)
-        mp.clf()
-
-        tags_hist_name = macs_f.file_info['name']+'_tags.png'
-        tags_hist_fn = os.path.join(infosite_img_path,tags_hist_name)
-        tags_hist_url = json_d['stage url']+'/'+infosite_dir_name+'/images/'+tags_hist_name
-        peak_json[peak_fn]['tag distribution url'] = tags_hist_url
-        mp.figure(figsize=figsize)
-        mp.subplots_adjust(**subplots_sizes)
-        mp.hist((peak_stats['tags'],neg_peak_stats['tags']),label=hist_labels,bins=20,log=True)
-        mp.title('%s\npeak tag count distribution'%macs_f.file_info['name'])
-        mp.xlabel('# tags')
-        mp.ylabel('# peaks')
-        mp.legend()
-        mp.savefig(tags_hist_fn)
-        mp.clf()
-
-        pval_hist_name = macs_f.file_info['name']+'_pval.png'
-        pval_hist_fn = os.path.join(infosite_img_path,pval_hist_name)
-        pval_hist_url = json_d['stage url']+'/'+infosite_dir_name+'/images/'+pval_hist_name
-        peak_json[peak_fn]['pvalue distribution url'] = pval_hist_url
-        mp.figure(figsize=figsize)
-        mp.subplots_adjust(**subplots_sizes)
-        mp.hist((peak_stats['pvalue'],neg_peak_stats['pvalue']),label=hist_labels,bins=20,log=True)
-        mp.title('%s\npeak -10*log10(p-valuek) distribution'%macs_f.file_info['name'])
-        mp.xlabel('-10*log10(p-value)')
-        mp.ylabel('# peaks')
-        mp.legend()
-        mp.savefig(pval_hist_fn)
-        mp.clf()
-
-        fold_hist_name = macs_f.file_info['name']+'_fold.png'
-        fold_hist_fn = os.path.join(infosite_img_path,fold_hist_name)
-        fold_hist_url = json_d['stage url']+'/'+infosite_dir_name+'/images/'+fold_hist_name
-        peak_json[peak_fn]['fold distribution url'] = fold_hist_url
-        mp.figure(figsize=figsize)
-        mp.subplots_adjust(**subplots_sizes)
-        mp.hist((peak_stats['fold_enrichment'],neg_peak_stats['fold_enrichment']),label=hist_labels,bins=20,log=True)
-        mp.title('%s\npeak fold enrichment distribution'%macs_f.file_info['name'])
-        mp.xlabel('fold enrichment')
-        mp.ylabel('# peaks')
-        mp.legend()
-        mp.savefig(fold_hist_fn)
-        mp.clf()
-
-        fdr_hist_name = macs_f.file_info['name']+'_fdr.png'
-        fdr_hist_fn = os.path.join(infosite_img_path,fdr_hist_name)
-        fdr_hist_url = json_d['stage url']+'/'+infosite_dir_name+'/images/'+fdr_hist_name
-        peak_json[peak_fn]['fdr distribution url'] = fdr_hist_url
-        mp.figure(figsize=figsize)
-        mp.subplots_adjust(**subplots_sizes)
-        mp.hist(peak_stats['fdr'],label=hist_labels[0],bins=20,log=True)
-        mp.title('%s\npeak fdr distribution'%macs_f.file_info['name'])
-        mp.xlabel('fdr')
-        mp.ylabel('# peaks')
-        mp.legend()
-        mp.savefig(fdr_hist_fn)
-        mp.clf()
-
-        chr_dist_name = macs_f.file_info['name']+'_chr_dist.png'
-        chr_dist_fn = os.path.join(infosite_img_path,chr_dist_name)
-        chr_dist_url = json_d['stage url']+'/'+infosite_dir_name+'/images/'+chr_dist_name
-        peak_json[peak_fn]['chr distribution url'] = chr_dist_url
-        chromos = []
-        if json_d.has_key('org') :
-            chr_sizes_fn = get_org_settings(json_d['org'])['ucsc_chrom_sizes']
-            chromos = [r[0] for r in reader(open(chr_sizes_fn),delimiter='\t')]
-        else :
-            chromos = list(set(pos_chr_dist.keys()).union(neg_chr_dist.keys()))
-        standard_chromos = filter(lambda x: re.search('^chr[0-9MXY]+$',x) is not None,chromos)
-
-        # hack chrM, chrX and chrY so they sort right
-        if 'chrM' in standard_chromos :
-            standard_chromos[standard_chromos.index('chrM')] = 'chr100'
-        if 'chrX' in standard_chromos :
-            standard_chromos[standard_chromos.index('chrX')] = 'chr101'
-        if 'chrY' in standard_chromos :
-            standard_chromos[standard_chromos.index('chrY')] = 'chr102'
-
-        standard_chromos.sort(key=lambda x: int(x.replace('chr','')))
-
-        # unhack chrM, chrX and chrY so they display right
-        if 'chr100' in standard_chromos :
-            standard_chromos[standard_chromos.index('chr100')] = 'chrM'
-        if 'chr101' in standard_chromos :
-            standard_chromos[standard_chromos.index('chr101')] = 'chrX'
-        if 'chr102' in standard_chromos :
-            standard_chromos[standard_chromos.index('chr102')] = 'chrY'
-
-        other_chromos = filter(lambda x: re.search('^chr[0-9MXY]+$',x) is None,chromos)
-
-        pos_plot_chr_dist = defaultdict(int)
-        neg_plot_chr_dist = defaultdict(int)
-        for chrom in standard_chromos :
-            pos_plot_chr_dist[chrom] += pos_chr_dist.get(chrom,0)
-            neg_plot_chr_dist[chrom] += neg_chr_dist.get(chrom,0)
-        for chrom in other_chromos :
-            pos_plot_chr_dist['Other'] += pos_chr_dist.get(chrom,0)
-            neg_plot_chr_dist['Other'] += neg_chr_dist.get(chrom,0)
-        chromos.append('Other')
-        mp.figure(figsize=figsize)
-        mp.subplots_adjust(bottom=0.18,**subplots_sizes)
-        mp.bar(range(len(chromos)),
-               [pos_plot_chr_dist[k] for k in chromos],
-               width=0.45,
-               color='b',
-               label='Positive'
-              )
-        mp.bar([x+0.45 for x in range(len(chromos))],
-               [neg_plot_chr_dist[k] for k in chromos],
-               width=0.45,
-               color='g',
-               label='Negative'
-              )
-        mp.xticks([x+0.45 for x in range(len(chromos))],chromos,rotation=90)
-        mp.title('%s\nPeaks by chromosome'%macs_f.file_info['name'])
-        mp.xlabel('Chromosome')
-        mp.ylabel('# peaks')
-        mp.legend()
-        mp.savefig(chr_dist_fn)
-        mp.clf()
-
-        # pos vs neg peaks
-        pos_v_neg_name = '%s_pos_v_neg.png'%macs_f.file_info['name']
-        pos_v_neg_fn = os.path.join(infosite_img_path,pos_v_neg_name)
-        pos_v_neg_url = json_d['stage url']+'/'+infosite_dir_name+'/images/'+pos_v_neg_name
-        peak_json[peak_fn]['pos v neg url'] = pos_v_neg_url
-        cmd = 'plot_pos_vs_neg_peaks.py --output=%s %s %s'%(pos_v_neg_fn,peak_fn, neg_peak_fn)
-        sys.stderr.write(cmd+'\n')
-        r = call(cmd,shell=True)
-
-        # motif stuff
-        if opts.skip_motif_scan or opts.skip_motif_stuff :
-            sys.stderr.write('Obediently skipping motif stuff\n')
-        else :
-            # not exactly sure the best way to find the filtered macs file yet,
-            # just take the .xls file with the longest filename?
-            filtered_peak_fns = glob.glob('%s_peaks_*'%macs_f.file_info['name'])
-            filtered_peak_fns.sort(key=lambda x: len(x),reverse=True)
-            filtered_peak_fn = filtered_peak_fns[0]
-
-            motif_results_fns = glob.glob('%s_motifs_beta*_cv*[0-9].tamo'%macs_f.file_info['name'])
-            motif_results_fn = motif_results_fns[0]
-            #TODO - do check for file exists
-
-            # motif_scan.py <org> <peak fn> <TAMO motif fn>
-            fixed_peak_width = ''
-            if json_d['fixed peak width'] != 'none' :
-                fixed_peak_width = '--fixed-peak-width=%s'%json_d['fixed peak width']
-
-            cmd = 'motif_scan.py %s --dir=%s/images/ %s %s %s'
-            cmd = cmd%(fixed_peak_width,infosite_dir_name,json_d['org'],filtered_peak_fn,motif_results_fn)
-            sys.stderr.write(cmd+'\n')
-            call(cmd,shell=True)
-
-        # pot_peaks_vs_motifs.py <peaks fn> <seq score fn> <bg score fn>
-
-
-    # 5. build reSt document
-    reSt_fn = exp_name+'_info.rst'
-    reSt_path = os.path.join(infosite_path,reSt_fn)
-    reSt_html_name = exp_name+'_info.html'
-    reSt_html_path = os.path.join(infosite_path,reSt_html_name)
-    reSt_url = json_d['stage url'] + '/' + infosite_dir_name + '/' + reSt_html_name
-    doc = ReStDocument(reSt_path)
-    doc.add(ReStSection("Infopage for %s"%exp_name))
-
-    # basic experiment stats table
-    ident = lambda x: x or 'unknown'
-    stat_key_labels_fmts = [
-                        ('org','Organism',ident),
-                        ('analysis path','Analysis Path',ident),
-                        ('experiment path','Experiment Path',ident),
-                        ('control path','Control Path',ident),
-                        ('format','Read Format',ident),
-                        ('FDR filter','FDR filter',ident),
-                        ('mapping type','Gene Mapping Type',ident),
-                        ('mapping window','Gene Mapping Window',lambda x: x and '-%s,%s'%tuple(x)),
-                        ('peaks used by THEME','Peaks used by THEME',ident)
-                       ]
-    stat_rows = [('**%s**'%label, fmt(json_d.get(key))) for key,label,fmt in stat_key_labels_fmts]
-    doc.add(ReStSimpleTable(None,stat_rows))
-
-    doc.add(ReStSection('MACS Peak File Stats',level=2))
-
-    # go through peak files
-    peak_recs = json_d['peak files']
-    fl_str = lambda x: x and '%.2g'%float(x)
-    stat_key_labels_fmts = [
-                        ('paired peaks','*paired peaks*',ident),
-                        ('positive peaks','*positive peaks*',ident),
-                        ('negative peaks','*negative peaks*',ident),
-                        ('reads under peaks','*reads under positive peaks*',ident),
-                        ('total tags in treatment','*Treatment Tags*',ident),
-                        ('tags after filtering in treatment','after filtering',ident),
-                        ('Redundant rate in treatment','redunancy rate',fl_str),
-                        ('maximum duplicate tags at the same position in treatment','max dup. tags',ident),
-                        ('total tags in control','*Control Tags*',ident),
-                        ('tags after filtering in control','after filtering',ident),
-                        ('Redundant rate in control','redunancy rate',fl_str),
-                        ('maximum duplicate tags at the same position in control','max dup. tags',ident),
-                        ('peak tag count filter','*Minimum peak tag count*',ident),
-                        ('d','*MACS d*',ident),
-                        ('band width','*band width*',ident),
-                        ('MACS version','*MACS version*',ident),
-                        ('pvalue cutoff','*p-value cutoff*',lambda x: '1e%d'%int(log(x,10))),
-                       ]
-
-    for peak_fn,peak_stats in peak_recs.items() :
-
-        # add the new section and stats table
-        doc.add(ReStSection(peak_fn,level=3))
-        stat_rows = [('*%s*'%label, fmt(peak_stats.get(key))) for key,label,fmt in stat_key_labels_fmts]
-        doc.add(ReStSimpleTable(None,stat_rows))
-
-        # link to the peaks file
-        peak_infosite_name = os.path.join(infosite_dir_name,peak_fn)
-        peak_infosite_path = os.path.abspath(peak_infosite_name)
-        peak_infosite_url = json_d['stage url'] + '/' + peak_infosite_name
-        call('cp %s %s'%(peak_fn,os.path.join(infosite_dir_name,peak_fn)),shell=True)
-        doc.add(ReStSimpleTable(None,[('**MACS Peaks File**','`%s`_'%peak_infosite_url)]))
-        doc.add(ReStHyperlink(peak_infosite_url,url=peak_infosite_url))
-
-        # UCSC track info
-        if peak_stats.has_key('ucsc tracks') :
-            ucsc_tbl = ReStSimpleTable(('**UCSC Genome Browser Track Lines**',),
-                                      [[x] for x in peak_stats['ucsc tracks']])
-            doc.add(ucsc_tbl)
-        else :
-            doc.add(ReStSimpleTable(None,[['UCSC integration was not enabled for this experiment']]))
-
-        # peak quality plots
-        img_tbl1 = ReStSimpleTable(None, [
-                    [
-                     ReStImage(peak_stats['pos v neg url'],options={'width':'600px','align':'center'}),
-                    ]
-                   ]
-                  )
-        doc.add(img_tbl1)
-
-        img_tbl2 = ReStSimpleTable(None, [
-                    [
-                     ReStImage(peak_stats['length distribution url'],options={'width':'250px','align':'center'}),
-                     ReStImage(peak_stats['tag distribution url'],options={'width':'250px','align':'center'}),
-                     ReStImage(peak_stats['pvalue distribution url'],options={'width':'250px','align':'center'})
-                    ],
-                    [
-                     ReStImage(peak_stats['fold distribution url'],options={'width':'250px','align':'center'}),
-                     ReStImage(peak_stats['fdr distribution url'],options={'width':'250px','align':'center'}),
-                     ReStImage(peak_stats['chr distribution url'],options={'width':'250px','align':'center'})
-                    ]
-                  ]
-                  )
-        doc.add(img_tbl2)
-
-        # gene info
-        gene_fn = peak_stats['name']+'_genes.txt'
-        gene_link = os.path.join(infosite_dir_name,gene_fn)
-        if not os.path.exists(gene_link) :
-            shutil.copyfile(gene_fn,gene_link)
-        gene_url = json_d['stage url']+'/'+gene_link
-
-        # gather other gene mapping stats
-        # knownGeneID
-        # geneSymbol
-        # chr
-        # start
-        # end
-        # length
-        # summit
-        # tags
-        # -10*log10(pvalue)
-        # fold_enrichment
-        # FDR(%)
-        # peak
-        # loc
-        # dist
-        # from
-        # feature
-        # score
-        # map
-        # type
-        # map
-        # subtype
-
-        gene_reader = DictReader(open(gene_fn),delimiter='\t')
-        gene_stats = defaultdict(set)
-        gene_pvals = defaultdict(float)
-        for rec in gene_reader :
-            gene_stats['num knownGenes'].add(rec['knownGeneID'])
-            gene_stats['num geneSymbols'].add(rec['geneSymbol'])
-            gene_pvals[rec['geneSymbol']] = max(gene_pvals[rec['geneSymbol']],float(rec['-10*log10(pvalue)']))
-        gene_pvals = gene_pvals.items()
-        gene_pvals.sort(key=lambda x: x[1],reverse=True)
-        for k,v in gene_pvals[:20]:
-            print k,v
-        gene_mapping_data = [('**# knownGenes mapped**',len(gene_stats['num knownGenes'])),
-                             ('**# gene symbols mapped**',len(gene_stats['num geneSymbols'])),
-                             ('**Top 10 gene symbols**',','.join([x[0] for x in gene_pvals[:10]])),
-                             ('**All gene mappings**','`%s`_'%gene_url)
-                            ]
-
-        # plots from plot_peak_loc_dist.py
-        gene_pie_name = exp_name+'_gene_map.png'
-        peak_pie_name = exp_name+'_peak_map.png'
-        hist_name = exp_name+'_peak_dist.png'
-        pval_bar_name = exp_name+'_pval_bar.png'
-        peak_loc_d = {'out_dir':infosite_path,
-                      'gene_pie_fn':os.path.join(infosite_path,'images',gene_pie_name),
-                      'peak_pie_fn':os.path.join(infosite_path,'images',peak_pie_name),
-                      'pval_bar_fn':os.path.join(infosite_path,'images',pval_bar_name),
-                      'hist_fn':os.path.join(infosite_path,'images',hist_name),
-                      'peak_fn':peak_fn,
-                      'gene_name':gene_fn
-                      }
-        cmd = 'plot_peak_loc_dist.py --save -d %(out_dir)s -g %(gene_pie_fn)s ' \
-              '-p %(peak_pie_fn)s -f %(hist_fn)s -b %(pval_bar_fn)s ' \
-              '%(peak_fn)s %(gene_name)s'
-        sys.stderr.write(cmd%peak_loc_d+'\n')
-        call(cmd%peak_loc_d,shell=True)
-        peak_stats['gene map url'] = json_d['stage url']+'/'+infosite_dir_name+'/images/'+gene_pie_name
-        peak_stats['peak map url'] = json_d['stage url']+'/'+infosite_dir_name+'/images/'+peak_pie_name
-        peak_stats['pval bar url'] = json_d['stage url']+'/'+infosite_dir_name+'/images/'+pval_bar_name
-        peak_stats['dist url'] = json_d['stage url']+'/'+infosite_dir_name+'/images/'+hist_name
-
-        # make links to the different peaks files
-        feature_patts = ('promoter.txt','gene_exon.txt','gene_intron.txt','after.txt','intergenic.xls')
-        feature_data = []
-        feature_urls = []
-
-        for patt in feature_patts :
-            feature_fn = '%s_*_%s'%(peak_stats['name'],patt)
-            feature_path = glob.glob(os.path.join(infosite_dir_name,feature_fn))
-            if len(feature_path) == 0 :
-                sys.stderr.write('Warning: %s could not be found, skipping feature type\n'%os.path.join(infosite_dir_name,feature_fn))
-                continue
-            feature_path = feature_path[0]
-            feature_url = json_d['stage url']+'/'+feature_path
-
-            # create UCSC formatted versions of the files
-            if patt.endswith('.txt') : # these have gene columns
-                feature_type = patt.replace('.txt','')
-                ucsc_feature_fn = feature_fn.replace('.txt','_ucsc.txt')
-                st,en = 2,4
-            elif patt.endswith('.xls') :
-                feature_type = patt.replace('.xls','')
-                ucsc_feature_fn = feature_fn.replace('.xls','_ucsc.xls')
-                st,en = 0,2
-
-            ucsc_feature_path = os.path.join(infosite_dir_name,ucsc_feature_fn)
-            ucsc_feature_f = open(ucsc_feature_path,'w')
-            ucsc_feature_writer = writer(ucsc_feature_f,delimiter='\t')
-            for l in reader(open(feature_path),delimiter='\t') :
-                rec = l[0:st] + \
-                      ['%s:%s-%s'%tuple(l[st:en+1])] + \
-                      l[en+1:]
-                ucsc_feature_writer.writerow(rec)
-            ucsc_feature_f.close()
-
-            ucsc_feature_url = json_d['stage url']+'/'+ucsc_feature_path
-
-            feature_data.append(('**%s peaks**'%feature_type,'`%s`_ `UCSC %s`_'%(feature_url,feature_type)))
-            feature_urls.append(ReStHyperlink(feature_url,url=feature_url))
-            feature_urls.append(ReStHyperlink('UCSC %s'%feature_type,url=ucsc_feature_url))
-
-        gene_mapping_data.extend(feature_data)
-        feat_tbl = ReStSimpleTable(('**Gene mapping data**',''),gene_mapping_data)
-        doc.add(feat_tbl)
-        doc.add(ReStHyperlink(gene_url,url=gene_url))
-        for url in feature_urls :
-            doc.add(url)
-
-        img_tbl3 = ReStSimpleTable(None, [
-                    [
-                     ReStImage(peak_stats['gene map url'],options={'align':'center'}),
-                     ReStImage(peak_stats['peak map url'],options={'align':'center'})
-                    ],
-                    [
-                     ReStImage(peak_stats['pval bar url'],options={'align':'center'}),
-                     ReStImage(peak_stats['dist url'],options={'align':'center'})
-                    ]
-                   ]
-                   )
-        doc.add(img_tbl3)
-
-        # now put some motif stuff up there
-
-
-        if opts.skip_motif_stuff :
-            sys.stderr.write('Obediently skipping even more motif stuff\n')
-        else :
-            # THEME refines all motifs, display the top 30
-
-            # for now, just list a table of the top 30 significant, unrefined motifs
-            doc.add(ReStSection('%s Top 30 Refined Motif Results'%peak_stats['name'],level=3))
-            motif_results_fns = glob.glob('%s_motifs_beta*_cv*[0-9].txt'%macs_f.file_info['name']) #catRun_mfold10,30_pval1e-5_motifs_beta0.0_cv5.txt
-            #TODO - do check for file exists
-
-            motif_results_fn = motif_results_fns[0]
-
-            motif_reader = reader(open(motif_results_fn),delimiter='\t')
-
-            motif_header = motif_reader.next()
-            motif_data = []
-            top_n = 30
-            motif_fmts = (ident,ident,int,fl_str,fl_str,fl_str,fl_str,fl_str,fl_str)
-            motif_plot_urls = []
-            for rec in motif_reader :
-                motif_data.append([f(x) for f,x in zip(motif_fmts,rec)])
-                """
-                if rec[2] in motif_sig_inds_d.keys() :
-                    from_id = motif_sig_inds_d[rec[2]]
-                    try :
-                        old_id_fn = glob.glob(infosite_dir_name+'/images/*_%d_peakmot.png'%from_id)[0]
-                        new_id_fn = old_id_fn.replace('_%d_'%from_id,'_%s_'%rec[2])
-                        os.rename(old_id_fn,new_id_fn)
-                    except :
-                        sys.stderr.write("Couldn't rename file for pattern %s, just " \
-                                         "assuming its there\n"%(infosite_dir_name+'/images/*_%d_peakmot.png'%from_id))
-                """
-                new_id_fn = glob.glob(infosite_dir_name+'/images/*_%s_peakmot.png'%rec[2])[0]
-                motif_plot_urls.append(json_d['stage url']+'/'+new_id_fn)
-
-            doc.add(ReStSimpleTable(['**%s**'%x for x in motif_header],motif_data[:top_n]))
-
-            # create another file with the full table
-            motif_results_base, motif_results_ext = os.path.splitext(motif_results_fn)
-            motif_doc_fn = motif_results_base+'.rst'
-            motif_doc_path = os.path.join(infosite_path,motif_doc_fn)
-            motif_doc_html_fn = motif_results_base+'.html'
-            motif_doc_html_path = os.path.join(infosite_path,motif_doc_html_fn)
-            motif_doc_url = json_d['stage url']+'/'+infosite_dir_name+'/'+motif_doc_html_fn
-            motif_doc = ReStDocument(motif_doc_path)
-            motif_doc.add(ReStSection('%s Full Motif Results'%peak_stats['name']))
-            motif_doc.add('`Back to main infopage`_')
-            motif_doc.add(ReStSimpleTable(['**%s**'%x for x in motif_header],motif_data))
-            motif_doc.add('`Back to main infopage`_')
-            motif_doc.add(ReStHyperlink('Back to main infopage',url=reSt_url))
-            motif_doc.write()
-            motif_doc.close()
-            rst2html_call = 'rst2html.py --stylesheet-path=/nfs/antdata/web_stage/css/lsr.css ' \
-                            '%s %s'%(motif_doc_path,motif_doc_html_path)
-            sys.stderr.write(rst2html_call+'\n')
-            r = call(rst2html_call,shell=True)
-            doc.add('`All refined motifs`_')
-            doc.add(ReStHyperlink('All refined motifs',url=motif_doc_url))
-
-            # individual motif plots
-            plt_tbl = []
-            for i,url in enumerate(motif_plot_urls[:30]) :
-                if i%3 == 0 :
-                    plt_tbl.append([])
-                plt_tbl[-1].append(ReStImage(url))
-
-            doc.add(ReStSimpleTable(('**Peak strength vs refined motif strength**','(based on top 2000 peak sequences by pvalue)',''),plt_tbl))
-
-    doc.write()
-    doc.close()
-
-    # 6. convert reSt to PDF and HTML
-    rst2html_call = 'rst2html.py --stylesheet-path=/nfs/antdata/web_stage/css/lsr.css ' \
-                    '%s %s'%(reSt_path,reSt_html_path)
-    sys.stderr.write(rst2html_call+'\n')
-    r = call(rst2html_call,shell=True)
-
-    pdf_name = exp_name+'_info.pdf'
-    pdf_path = os.path.join(infosite_path,pdf_name)
-    r = call('rst2pdf %s -o %s'%(reSt_path,pdf_path),shell=True)
-
-    # 7. write out url to infosite
-    print json_d['stage url']+'/'+infosite_dir_name+'/'+reSt_html_name
-    open(infosite_dir_name+'_url.txt','w').write(json_d['stage url']+'/'+infosite_dir_name+'/'+reSt_html_name+'\n')
--- a/chipsequtil-master/scripts/chipseq_pipeline.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,331 +0,0 @@
-#!/usr/bin/env python
-
-import os
-from subprocess import Popen, PIPE
-import string
-import sys
-from optparse import OptionParser, OptionGroup, SUPPRESS_HELP
-
-from pypeline import Pypeline, ProcessPypeStep as PPS, PythonPypeStep as PyPS, parse_steplist
-from chipsequtil import get_file_parts, get_org_settings
-from chipsequtil.util import MultiLineHelpFormatter
-from TAMO import MotifTools
-from TAMO.MD.THEME import parser as theme_parser
-
-usage = "%prog [options] <organism> <experiment alignment filename> [<control alignment filename>]"
-description = """1st generation ChIPSeq analysis pipeline:
-
-  - runs MACS to find peaks and sorts peaks by p-value
-  - sorts peaks by pvalue and isolates top *n*
-  - maps peaks to genes
-  - extracts fasta files for gene peaks in experiments
-  - constructs background sequences matching foreground distribution
-  - runs THEME.py on input sequences w/ refinement
-  - builds an infosite with stats from this analysis
-
-Control input file is optional.  *organism* argument is passed to the
-*org_settings.py* command to specify organism specific parameters, ensure
-that the following commands return valid paths:
-
-If running MACS:
- - org_settings.py <organism> genome_size
- - org_settings.py <organism> genome_dir
- - org_settings.py <organsim> refgene_anno_path
-
-If running THEME:
- - org_settings.py <organism> theme_hypotheses
- - org_settings.py <organism> theme_markov
-
-"""
-
-epilog = """Note: it is advised to leave the --*-args arguments unchanged
-unless you really know what you're doing."""
-
-parser = OptionParser(usage=usage,description=description,epilog=epilog,formatter=MultiLineHelpFormatter())
-parser.add_option('--auto',dest='auto',action='store_true',help='run all steps non-interactively (for batch mode, e.g.)')
-parser.add_option('--steplist',dest='steplist',default='',help='with --auto, run specific steps')
-parser.add_option('--exp-name',dest='exp_name',default=os.path.basename(os.getcwd()),help='name for the experiment/pipeline, used for convenience [default: current directory name]')
-parser.add_option('--bed-args',dest='bed_args',default='--stdout --chromo-strip=.fa',help='double quote wrapped arguments for gerald_to_bed.py [default: %default]')
-#parser.add_option('--stats-args',dest='stats_args',default='',help='double quote wrapped arguments for gerald_stats.py [default: %default]')
-parser.add_option('--macs-exec',dest='macs_exec',default='macs14',help='the executable to use for MACS, if not an absolute path it needs to be on your shell environment path [default: %default]')
-parser.add_option('--macs-args',dest='macs_args',default='--pvalue=1e-5',help='double quote wrapped arguments for macs, only changing --mfold, --tsize, --bw, and --pvalue recommended [default: %default]')
-parser.add_option('--map-args',dest='map_args',default='--tss --upstream-window=10000 --downstream-window=10000',help='double quote wrapped arguments for mapping peaks to genes [default: %default]')
-parser.add_option('--filter-peaks-args',dest='filter_peaks_args',default="--sort-by=pvalue --top=1000 -f 'tags>20'",help='double quote wrapped arguments for filter_macs_peaks.py [default: %default]')
-parser.add_option('--filter-neg-peaks-args',dest='filter_neg_peaks_args',default="-f 'tags>20'",help='double quote wrapped arguments for filter_macs_peaks.py applied to negative peaks [default: %default]')
-parser.add_option('--peaks-to-fa-args',dest='peaks_to_fa_args',default='--fixed-peak-width=200',help='double quote wrapped arguments for peaks_to_fasta.py [default: %default]')
-parser.add_option('--bg-exec',dest='bg_exec',default='rejection_sample_fasta.py',help='the executable to use for generating background sequences for THEME, if not an absolute path it needs to be on your shell environment path [default: %default]')
-parser.add_option('--bg-args',dest='bg_args',default='--num-seq=2.1x',help='double quote wrapped arguments for background sequence generation utility [default: %default]')
-parser.add_option('--theme-args',dest='theme_args',default='--beta=0.7 --cv=5 --trials=25',help='double quote wrapped arguments for THEME.py [default: %default]')
-parser.add_option('--motif-pval-cutoff',dest='motif_pval',type='float',default=1e-5,help='the p-value cutoff for sending non-refined enrichmed motifs to THEME for refinement')
-parser.add_option('--parallelize',dest='parallelize',action='store_true',help='parallelize portions of the pipeline using qsub, only works from SGE execution hosts')
-parser.add_option('--ucsc',dest='ucsc',action='store_true',default=False,help='perform tasks for automated integration with UCSC genome browser [default:%default]')
-parser.add_option('--build-infosite-args',dest='infosite_args',default='',help='arguments to pass to build_chipseq_infosite.py [default: None]')
-
-ucsc_group = OptionGroup(parser,"UCSC Integration Options (with --ucsc)")
-ucsc_group.add_option('--stage-dir',dest='stage_dir',default='./',help='root directory where UCSC integration files should be made available [default: %default]')
-ucsc_group.add_option('--stage-url',dest='stage_url',default='http://localhost/',help='URL where UCSC integration files will be made available over the web [default: %default]')
-parser.add_option_group(ucsc_group)
-
-#parallel_group = OptionGroup(parser,"Parallelization Options (with --parallelize)",description="These options are relevant to parallelization of the pipeline, functionality is in beta status until further notice")
-#parallel_group.add_option('--split-args',dest='split_args',default='--type=count --arg=16',help='double quote wrapped arguments for split_file.py [default: %default]')
-#parallel_group.add_option('--qsub-args',dest='qsub_args',default='--die-on-err',help='double quote wrapped arguments for split_qsub.py [default: %default]')
-#parser.add_option_group(parallel_group)
-
-parser.add_option('--print-args',dest='print_args',action='store_true',help=SUPPRESS_HELP) # secret ninja option
-
-
-if __name__ == '__main__' :
-
-    # parse command line arguments
-    opts, args = parser.parse_args(sys.argv[1:])
-
-    # stick it up here, so when we print out args it's updated
-    if opts.ucsc and opts.macs_args.find('--wig') == -1 :
-        opts.macs_args += " --wig"
-
-    #  just print out all options as passed in for script generating purposes
-    if opts.print_args :
-        opts_strs = []
-        all_opts = []
-        all_opts.extend(parser.option_list)
-        all_opts.extend(*[x.option_list for x in parser.option_groups])
-        for opt in all_opts :
-            opt_str = opt.get_opt_string()
-            if opt_str in ['--help','--print-args'] :
-                pass
-            elif opt_str == '--steplist' and not opts.auto :
-                pass
-            #elif opt_str in ['--stage-dir','--stage-url'] and not opts.ucsc :
-            #    pass
-            #elif opt_str in ['--split-args','--qsub-args'] and not opts.parallelize :
-            #    pass
-            elif opt.action == 'store' :
-                arg = str(getattr(opts,opt.dest))
-                if arg.count(' ') > 0 or arg.find(' -') != -1 or arg.startswith('-') or arg.find('--') != -1 :
-                    opts_strs.append('    %s="%s"'%(opt.get_opt_string(),str(getattr(opts,opt.dest))))
-                else :
-                    opts_strs.append('    %s=%s'%(opt.get_opt_string(),str(getattr(opts,opt.dest))))
-            elif opt.action == 'store_true' and getattr(opts,opt.dest) :
-                opts_strs.append('    %s'%opt.get_opt_string())
-        opts_strs.append('    $@')
-        sys.stdout.write(' \\\n'.join(opts_strs)+'\n')
-        sys.exit(0)
-
-    if len(args) < 2 :
-        parser.error('Must provide two non-option arguments')
-
-    # filenames and paths
-    organism, experiment_fn = args[0:2]
-    control_fn = None
-    if len(args) > 2 :
-        control_fn = args[2]
-
-    org_settings = get_org_settings(organism)
-    refgene_fn = org_settings['refgene_anno_path']
-    kg_ref = org_settings['known_gene_anno_path']
-    kg_xref = org_settings['known_gene_xref_path']
-
-    exp_fpath,exp_fname,exp_fbase,exp_fext = get_file_parts(experiment_fn)
-    exp_wrk_dir = os.path.abspath('.exp_%s_%s'%(exp_fbase,opts.exp_name))
-
-    if control_fn :
-        cnt_fpath,cnt_fname,cnt_fbase,cnt_fext = get_file_parts(control_fn)
-        cnt_wrk_dir = os.path.abspath('.cnt_%s_%s'%(cnt_fbase,opts.exp_name))
-
-    # the pipeline
-    #log_fn = os.path.join(opts.exp_name+'_pipeline.log')
-    pipeline = Pypeline('Analysis pipeline for %s'%opts.exp_name)
-
-    steps = []
-
-    #if opts.parallelize :
-    #    # split up files
-    #    calls = ["mkdir %s"%exp_wrk_dir,
-    #             "split_file.py %s --outdir=%s %s"%(opts.split_args,exp_wrk_dir,experiment_fn),]
-    #    if control_fn :
-    #            calls.extend(["mkdir %s"%cnt_wrk_dir,
-    #             "split_file.py %s --outdir=%s %s"%(opts.split_args,cnt_wrk_dir,control_fn),
-    #            ])
-    #    steps.append(PPS('Split files',calls,env=os.environ))
-
-    ############################################################################
-    # run macs
-    ############################################################################
-    cnt_flag = ''
-    if control_fn :
-        cnt_flag = '-c %s'%control_fn
-
-    # parse macs_args so we can extract mfold and pvalue...in a rather silly way
-    macs_mfold = [x for x in opts.macs_args.split(' ') if 'mfold' in x]
-    macs_mfold = macs_mfold[0].split('=',1)[1] if len(macs_mfold) >= 1 else 'DEF'
-
-    macs_pvalue = [x for x in opts.macs_args.split(' ') if 'pvalue' in x]
-    macs_pvalue = macs_pvalue[0].split('=',1)[1] if len(macs_pvalue) >= 1 else 'DEF'
-    macs_name = opts.exp_name+'_mfold%s_pval%s'%(macs_mfold,macs_pvalue)
-
-    macs_peaks_fn = macs_name+'_peaks.xls'
-    macs_neg_peaks_fn = macs_name+'_negative_peaks.xls'
-    macs_screen_output_fn = macs_name+'_output.txt'
-
-    macs_d = {'exp_fn':experiment_fn,
-              'cnt_flag':cnt_flag,
-              'name':macs_name,
-              'macs_exec':opts.macs_exec,
-              'macs_args':opts.macs_args,
-              'macs_out':macs_screen_output_fn,
-              'gsize':org_settings['genome_size'],
-              }
-    calls = ["%(macs_exec)s --gsize=%(gsize)s -t %(exp_fn)s %(cnt_flag)s --name=%(name)s %(macs_args)s 2>&1 | tee %(macs_out)s"%macs_d]
-    steps.append(PPS('Run MACS',calls,env=os.environ))
-
-
-    ############################################################################
-    # process and stage wiggle files
-    ############################################################################
-    if opts.ucsc :
-        wiggle_dir = macs_name+'_MACS_wiggle'
-        ucsc_d = {'org':organism,
-                  'stage_dir':opts.stage_dir,
-                  'stage_url':opts.stage_url,
-                  'macs_dir':wiggle_dir,
-                 }
-
-        calls = ["integrate_macs_ucsc.py --auto %(org)s %(stage_dir)s %(stage_url)s %(macs_dir)s"%ucsc_d]
-        steps.append(PPS("UCSC Integration",calls))
-
-
-    ############################################################################
-    # map peaks to genes
-    ############################################################################
-    map_fn = "%s_genes.txt"%macs_name
-    map_stats_fn = "%s_genes_stats.xls"%macs_name
-    map_d = {'kg_ref':kg_ref,
-             'kg_xref':kg_xref,
-             'peaks_fn':macs_peaks_fn,
-             'bed_peaks_fn':macs_name+'_peaks.bed',
-             'map_fn':map_fn,
-             'map_stats_fn':map_stats_fn,
-             'map_args':opts.map_args
-            }
-    # make sure peak files don't have .fa at the end of their chromosomes
-    calls = ["sed -i 's/\.fa//g' %(peaks_fn)s %(bed_peaks_fn)s"%map_d]
-    c = "map_peaks_to_known_genes.py %(map_args)s --map-output=%(map_fn)s " + \
-         "--detail --stats-output=%(map_stats_fn)s %(kg_ref)s %(kg_xref)s " + \
-         "%(peaks_fn)s"
-    calls.append(c%map_d)
-    steps.append(PPS('Map peaks to genes',calls,env=os.environ))
-
-
-    ############################################################################
-    # filter macs peaks
-    ############################################################################
-    filtered_d = {'filter_peaks_args':opts.filter_peaks_args,
-                  'filter_neg_peaks_args':opts.filter_neg_peaks_args,
-                  'peaks_fn':macs_peaks_fn,
-                  'neg_peaks_fn':macs_neg_peaks_fn
-                 }
-    c = "filter_macs_peaks.py --print-encoded-fn --encode-filters " \
-        "%(filter_peaks_args)s %(peaks_fn)s"
-    filtered_peaks_fn = Popen(c%filtered_d,shell=True,stdout=PIPE).communicate()[0]
-    filtered_neg_peaks_fn = macs_name + '_negative_peak_filt.xls'
-    calls = ["filter_macs_peaks.py --encode-filters %(filter_peaks_args)s %(peaks_fn)s"%filtered_d]
-    if control_fn is not None :
-         calls.append("filter_macs_peaks.py --encode-filters %(filter_neg_peaks_args)s %(neg_peaks_fn)s"%filtered_d)
-    steps.append(PPS('Filter MACS peaks',calls,env=os.environ))
-
-
-    ############################################################################
-    # THEME
-    ############################################################################
-    # extract foreground and generate background sequences
-    fg_fn = filtered_peaks_fn.replace('.xls','.fa')
-    fg_d = {'opts':opts.peaks_to_fa_args,
-            'organism':organism,
-            'fg_fn':fg_fn,
-            'peaks_fn':filtered_peaks_fn}
-    calls = ["peaks_to_fasta.py %(opts)s --output=%(fg_fn)s %(organism)s %(peaks_fn)s"%fg_d]
-    steps.append(PPS('Peaks to Fasta',calls,env=os.environ))
-
-    bg_fn = "%s_bg.fa"%macs_name
-    bg_d = {'opts':opts.bg_args,
-            'organism':organism,
-            'fg_fn':fg_fn,
-            'bg_fn':bg_fn}
-    calls = ["rejection_sample_fasta.py %(opts)s --output=%(bg_fn)s %(organism)s %(fg_fn)s"%bg_d]
-    steps.append(PPS('Generate Background Sequences',calls,env=os.environ))
-
-    # run THEME on fg
-    theme_opts, theme_args = theme_parser.parse_args(opts.theme_args.split(' '))
-    hyp_fn = org_settings['theme_hypotheses']
-    markov_fn = org_settings['theme_markov']
-
-    # run THEME w/ randomization by running each motif individuall
-    # this is because TAMO.MD has a memory leak
-    raw_motif_fn = '%s_motifs_beta%s_cv%s.tamo'%(macs_name,theme_opts.beta,theme_opts.cv)
-    random_cv_fn = '%s_motifs_beta%s_cv%s_rand.txt'%(macs_name,theme_opts.beta,theme_opts.cv)
-
-    # new old THEME call
-    #Usage: THEME.sh [options] <FG_FASTA> <BG_FASTA> <HYP_FN> <MARKOV>
-    #
-    #Run old THEME version
-    #
-    #Options:
-    #  -h, --help            show this help message and exit
-    #  --hyp-indices=HYP_INDS
-    #                        0-based indices of hypotheses to run [default: ALL]
-    #  --no-refine           do not run with refinement
-    #  --no-parallelize      do not use wqsub.py for parallelization
-    #  -v, --verbose         print out the commands that are being run
-    #  --dump                dump categtories to file
-    #  --output-filename=OUTPUT_FN
-    #                        filename to write motif results to [default:dummy.txt]
-    #  --random-output=RANDOM_FN
-    #                        filename to write motif results to
-    #                        [default:random.txt]
-    #  --motif-file=MOTIF_FN
-    #                        filename to write motif results to [default:dummy.out]
-    #  --beta=BETA           beta parameter to use [default:0.7]
-    #  --delta=DELTA         delta parameter to use [default:0.001]
-    #  --remove-common       remove common sequences from analysis
-    #  --randomization       run randomization
-    #  --cv=CV               number of cross validation folds [default:5]
-    #  --interactive         run the script interactively
-
-    motif_fn = '%s_motifs_beta%s_cv%s.txt'%(macs_name,theme_opts.beta,theme_opts.cv)
-    theme_d = {'opts':opts.theme_args,
-               'fg_fn':fg_fn,
-               'bg_fn':bg_fn,
-               'hyp':hyp_fn,
-               'markov':markov_fn,
-               'tamo_motif_fn':raw_motif_fn,
-               'random_fn':random_cv_fn,
-               'motif_fn':motif_fn
-              }
-
-    theme_call = "THEME.sh %(opts)s %(fg_fn)s %(bg_fn)s %(hyp)s %(markov)s " \
-                 "--motif-file=%(tamo_motif_fn)s " \
-                 "--random-output=%(random_fn)s " \
-                 "--output-filename=%(motif_fn)s " \
-                 "--randomization"
-
-    calls = [theme_call%theme_d]
-    steps.append(PPS('Run THEME',calls,env=os.environ))
-
-    # build infosite
-    calls = ['build_chipseq_infosite.py %s'%opts.infosite_args]
-    steps.append(PPS('Build infosite',calls,env=os.environ))
-
-    # cleanup
-    rm_str = "rm -f %(d)s/*.out %(d)s/*.err %(d)s/*.script %(d)s/*.stats %(d)s/*.bed"
-    calls = [rm_str%{'d':exp_wrk_dir}]
-
-    if control_fn :
-         calls.append(rm_str%{'d':cnt_wrk_dir})
-    #steps.append(PPS('Clean up',calls,env=os.environ))
-
-    pipeline.add_steps(steps)
-    if opts.auto and opts.steplist is not None :
-        steplist = parse_steplist(opts.steplist,pipeline)
-    else :
-        steplist = None
-    pipeline.run(interactive=not opts.auto,steplist=steplist)
--- a/chipsequtil-master/scripts/chipseq_pipeline_wo_ctrl.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,172 +0,0 @@
-#!/usr/bin/env python
-
-import os
-import sys
-from optparse import OptionParser, OptionGroup
-
-from pypeline import Pypeline, ProcessPypeStep as PPS
-from chipsequtil import get_file_parts, get_org_settings
-from chipsequtil.util import MultiLineHelpFormatter
-
-usage = "%prog [options] <organism> <experiment GERALD alignment filename> [<control GERALD alignment filename>]"
-description = """1st generation ChIPSeq analysis pipeline:
-
-  - converts Illumina GERALD alignment files to BED format
-  - calculates statistics on input alignments
-  - runs MACS to find peaks
-  - maps peaks to genes
-  - extracts fasta files for gene peaks in experiments
-  - constructs background sequences matching foreground distribution
-  - runs THEME.py on input sequences
-  - runs THEME.py randomization
-  - creates documentation on entire pipeline run
-
-Control input file is optional.  *organism* argument is passed to the
-*org_settings.py* command to specify organism specific parameters, ensure
-that the following commands return valid paths:
-
-If running MACS:
- - org_settings.py <organism> genome_size
- - org_settings.py <organism> genome_dir
- - org_settings.py <organsim> annotation_path
-
-If running THEME:
- - org_settings.py <organism> theme_hypotheses
- - org_settings.py <organism> theme_markov
-
-"""
-
-epilog = """Note: it is advised to leave the --*-args arguments unchanged
-unless you really know what you're doing."""
-
-parser = OptionParser(usage=usage,description=description,epilog=epilog,formatter=MultiLineHelpFormatter())
-parser.add_option('--auto',dest='auto',action='store_true',help='run all steps non-interactively (for batch mode, e.g.)')
-parser.add_option('--exp-name',dest='exp_name',default=os.path.basename(os.getcwd()),help='name for the experiment/pipeline, used for convenience [default: current directory name]')
-parser.add_option('--split-args',dest='split_args',default='--type=count --arg=16',help='double quote wrapped arguments for split_file.py [default: %default]')
-parser.add_option('--bed-args',dest='bed_args',default='--stdout --chromo-strip=.fa',help='double quote wrapped arguments for gerald_to_bed.py [default: %default]')
-parser.add_option('--stats-args',dest='stats_args',default='',help='double quote wrapped arguments for gerald_stats.py [default: %default]')
-parser.add_option('--qsub-args',dest='qsub_args',default='--die-on-err',help='double quote wrapped arguments for split_qsub.py [default: %default]')
-parser.add_option('--macs-args',dest='macs_args',default='--mfold=10 --tsize=35 --bw=150 --pvalue=1e-5',help='double quote wrapped arguments for macs, only changing --mfold, --tsize, --bw, and --pvalue recommended [default: %default]')
-parser.add_option('--pk-to-fa-args',dest='pk_to_fa_args',default='--bg-type=rej_samp',help='double quote wrapped arguments for peaks_to_fasta.py [default: %default]')
-parser.add_option('--theme-args',dest='theme_args',default='--beta=0.7 --cv=5',help='double quote wrapped arguments for THEME.py [default: %default]')
-
-
-if __name__ == '__main__' :
-
-    # parse command line arguments
-    opts, args = parser.parse_args(sys.argv[1:])
-
-    if len(args) < 3 :
-        parser.error('Must provide two non-option arguments')
-
-    # filenames and paths
-    organism, experiment_fn, control_fn = args[0:3]
-    control_fn = None
-    if len(args) > 3 :
-        control_fn = args[2]
-
-    org_settings = get_org_settings(organism)
-    refseq_fn = org_settings['annotation_path']
-
-    exp_fpath,exp_fname,exp_fbase,exp_fext = get_file_parts(experiment_fn)
-    exp_wrk_dir = os.path.abspath('.exp_%s_%s'%(exp_fbase,opts.exp_name))
-
-    if control_fn :
-        cnt_fpath,cnt_fname,cnt_fbase,cnt_fext = get_file_parts(control_fn)
-        cnt_wrk_dir = os.path.abspath('.cnt_%s_%s'%(cnt_fbase,opts.exp_name))
-
-    # the pipeline
-    pipeline = Pypeline()
-
-    steps = []
-
-    # split up files
-    calls = ["mkdir %s"%exp_wrk_dir,
-             "split_file.py %s --outdir=%s %s"%(opts.split_args,exp_wrk_dir,experiment_fn),]
-    if control_fn :
-            calls.extend(["mkdir %s"%cnt_wrk_dir,
-             "split_file.py %s --outdir=%s %s"%(opts.split_args,cnt_wrk_dir,control_fn),
-            ])
-    steps.append(PPS('Split files',calls,env=os.environ))
-
-    # convert to BED format
-    exp_bed_fn = "%s_exp.bed"%exp_fbase
-    calls = ["split_qsub.py %s --ext=.bed gerald_to_bed.py --util-args=\"%s\" %s/*.[0-9][0-9][0-9][0-9]"%(opts.qsub_args,opts.bed_args,exp_wrk_dir),
-             "wait_for_qsub.py",
-             "cat %s/*.bed > %s"%(exp_wrk_dir,exp_bed_fn),
-            ]
-
-    if control_fn :
-        cnt_bed_fn = "%s_cnt.bed"%cnt_fbase
-        calls.extend(["split_qsub.py %s --ext=.bed gerald_to_bed.py --util-args=\"%s\" %s/*.[0-9][0-9][0-9][0-9]"%(opts.qsub_args,opts.bed_args,cnt_wrk_dir),
-                      "wait_for_qsub.py",
-                      "cat %s/*.bed > %s"%(cnt_wrk_dir,cnt_bed_fn),
-                     ])
-
-    steps.append(PPS('Convert GERALD to BED format',calls,env=os.environ))
-
-    #steps.append(PPS('Helloooooooo nurse','echo Helloooooooo nurse'))
-    # generate alignment statistics
-    exp_stats_fn = '%s_stats.txt'%exp_fbase
-    calls = ["split_qsub.py %s --ext=.stats gerald_stats.py --util-args=\"%s\" %s/*.[0-9][0-9][0-9][0-9]"%(opts.qsub_args,opts.stats_args,exp_wrk_dir),
-             "wait_for_qsub.py",
-             "combine_gerald_stats.py %s/*.stats > %s"%(exp_wrk_dir,exp_stats_fn),
-            ]
-
-    if control_fn :
-        cnt_stats_fn = '%s_stats.txt'%cnt_fbase
-        calls.extend(["split_qsub.py %s --ext=.stats gerald_stats.py --util-args=\"%s\" %s/*.[0-9][0-9][0-9][0-9]"%(opts.qsub_args,opts.stats_args,cnt_wrk_dir),
-                 "wait_for_qsub.py",
-                 "combine_gerald_stats.py %s/*.stats > %s"%(cnt_wrk_dir,cnt_stats_fn),
-                ])
-    steps.append(PPS('Calculate alignment statistics',calls,env=os.environ))
-
-    # run macs
-    cnt_flag = ''
-    if control_fn :
-        cnt_flag = '-c %s'cnt_bed_fn
-
-    macs_d = {'exp_fn':exp_bed_fn,
-              'cnt_flag':cnt_flag,
-              'name':opts.exp_name,
-              'macs_args':opts.macs_args,
-              'gsize':org_settings['genome_size'],
-              }
-    calls = ["macs --gsize=%(gsize)s -t %(exp_fn)s %(cnt_flag)s --name=%(name)s --format=BED %(macs_args)s"%macs_d]
-    steps.append(PPS('Run MACS',calls,env=os.environ))
-
-    # map peaks to genes
-    peaks_fn = "%s_peaks.bed"%opts.exp_name
-    map_fn = "%s_genes.txt"%opts.exp_name
-    map_stats_fn = "%s_genes_stats.txt"%opts.exp_name
-    calls = ["map_peaks_to_genes.py --peaks-format=BED %(refGene_fn)s %(peaks_fn)s --map-output=%(map_fn)s --stats-output=%(map_stats_fn)s"%{'refGene_fn':refseq_fn,'peaks_fn':peaks_fn,'map_fn':map_fn,'map_stats_fn':map_stats_fn}]
-    steps.append(PPS('Map peaks to genes',calls,env=os.environ))
-
-    # THEME
-    # extract foreground and generate background sequences
-    fg_fn = "%s_peaks.fa"%opts.exp_name
-    bg_fn = "%s_bg.fa"%opts.exp_name
-    nib_dir = org_settings['genome_dir']
-    calls = ["peaks_to_fasta.py %(opts)s --output=%(fg_fn)s --bg-fn=%(bg_fn)s %(organism)s %(peaks_fn)s"%{'opts':opts.pk_to_fa_args,'organism':organism,'fg_fn':fg_fn,'bg_fn':bg_fn,'peaks_fn':peaks_fn}]
-    steps.append(PPS('Peaks to Fasta',calls,env=os.environ))
-
-    # run THEME on fg
-    motif_fn = '%s_motifs.txt'%opts.exp_name
-    hyp_fn = org_settings['theme_hypotheses']
-    markov_fn = org_settings['theme_markov']
-    calls = ["THEME.py %(opts)s --motif-file=%(motif_fn)s %(fg_fn)s %(bg_fn)s %(hyp)s %(markov)s"%{'opts':opts.theme_args,'motif_fn':motif_fn,'fg_fn':fg_fn,'bg_fn':bg_fn,'hyp':hyp_fn,'markov':markov_fn}]
-    steps.append(PPS('Run THEME on foreground',calls,env=os.environ))
-
-    # run THEME randomization
-    random_motif_fn = '%s_motifs_rand.txt'%opts.exp_name
-    calls = ["THEME.py %(opts)s --randomization --motif-file=%(motif_fn)s %(fg_fn)s %(bg_fn)s %(hyp)s %(markov)s"%{'opts':opts.theme_args,'motif_fn':random_motif_fn,'fg_fn':fg_fn,'bg_fn':bg_fn,'hyp':hyp_fn,'markov':markov_fn}]
-    steps.append(PPS('Run THEME randomization',calls,env=os.environ))
-
-    # cleanup
-    rm_str = "rm -f %(d)s/*.out %(d)s/*.err %(d)s/*.script %(d)s/*.stats %(d)s/*.bed"
-    calls = [rm_str%{'d':exp_wrk_dir},
-             rm_str%{'d':cnt_wrk_dir}]
-    steps.append(PPS('Clean up',calls,env=os.environ))
-
-    pipeline.add_steps(steps)
-    pipeline.run(interactive=not opts.auto)
--- a/chipsequtil-master/scripts/combine_gerald_stats.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,27 +0,0 @@
-#!/usr/bin/env python
-
-import sys, re, os
-from optparse import OptionParser
-from collections import defaultdict as dd
-
-parser = OptionParser()
-
-if __name__ == '__main__' :
-
-    opts, args = parser.parse_args(sys.argv[1:])
-
-    all_stats = dd(int)
-    for fn in args :
-        d = eval(open(fn).read())
-        for k,v in d.items() :
-            all_stats[k] += v
-            all_stats['tot. aligns'] += v
-
-    keys = all_stats.keys()
-    keys.sort()
-    keys.remove('tot. aligns')
-
-    for k in keys :
-        print k,':',all_stats[k],'(%.4f)'%(float(all_stats[k])/all_stats['tot. aligns'])
-
-    print 'tot. aligns',':',all_stats['tot. aligns']
--- a/chipsequtil-master/scripts/compare_microarray_binding.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,81 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-from csv import reader, writer
-from collections import defaultdict as dd
-from optparse import OptionParser
-from subprocess import Popen, PIPE
-
-from chipsequtil import MACSOutput, BEDOutput, AffyBiocFile
-
-usage = '%prog -m <mapped MACS peaks file>|-b <mapped BED peaks file>|-a <mapped microarray file> [-m <MACS peaks file> ...] [-b <mapped BED peaks file> ...] [-a <mapped microarray file> ...]'
-description = """Join all files on the first column, concatenating records with \
-matching entries onto one line per entry.  Understands MACS peaks data as mapped \
-with *map_peaks_to_known_genes.py* utility microarray data as mapped by \
-*probeset_to_known_genes.py* utility, passed to program using *-m* and *-a* options \
-respectively. Output is a file where genes with binding data (MACS, BED files) have \
-column with a 1, 0 otherwise, and genes with microarray expression values have logFC \
-and adjusted p-value colums for each microarray file input. Internally, uses \
-*join_mapped_known_genes.py* with --binary-plus option to perform mapping and parses \
-output.  MACS fields are listed first, followed by BED fields, followed by microarray \
-fields."""
-
-epilog="Note: microarray files should have been created by bioconductor, and all files should have a row of fieldnames as the first line"
-parser = OptionParser(usage=usage,description=description,epilog=epilog)
-parser.add_option('-a','--affy-file',dest='affy_file',action='append',default=[],help='add a mapped microarray file')
-parser.add_option('-b','--bed-file',dest='bed_file',action='append',default=[],help='add a mapped BED formatted peaks (*.bed) file')
-parser.add_option('-m','--macs-file',dest='macs_file',action='append',default=[],help='add a mapped default MACS formatted peaks (*.xls) file')
-parser.add_option('--output',dest='output',default=None,help='file to output joined records to [default: stdout]')
-
-if __name__ == '__main__' :
-
-    opts,args = parser.parse_args(sys.argv[1:])
-
-    if len(args) > 0 :
-        parser.error('There were non-option command line arguments passed, all files should have a preceeding option indicating filetype')
-
-    if len(opts.macs_file) == 0 and len(opts.affy_file) == 0 :
-        parser.error('No files were passed in, aborting')
-
-    # call join_mapped_known_genes.py
-    fn_map = {}
-    fn_map['macs'] = ' '.join(['-m %s'%fn for fn in opts.macs_file])
-    fn_map['bed'] = ' '.join(['-b %s'%fn for fn in opts.bed_file])
-    fn_map['array'] = ' '.join(['-a %s'%fn for fn in opts.affy_file])
-    join_call = 'join_mapped_known_genes.py --binary-plus %(macs)s %(bed)s %(array)s'%fn_map
-    p = Popen(join_call, shell=True, stdout=PIPE,stderr=PIPE)
-    stdout, stderr = p.communicate()
-    if len(stderr) != 0 :
-        print stderr
-
-    joined_output = stdout.split('\n')
-    joined_output = joined_output[:-1] if joined_output[-1] == '' else joined_output
-
-    # determine which fields will end up in the file
-    header = joined_output[0].split('\t')
-
-    # always want gene and symbol
-    field_indices = [0,1]
-
-    # macs and bed fields are named by filename
-    for fn in opts.macs_file+opts.bed_file :
-        field_indices.append(header.index(fn))
-
-    # affy fields are index(fn)+5, index(fn)+8
-    for fn in opts.affy_file :
-        # just add all the microarray columns
-        fn_header_indices = [i for i,x in enumerate(header) if x.find(fn) != -1]
-        field_indices.extend(fn_header_indices)
-
-        #field_indices.append(header.index(fn))
-        #field_indices.append(header.index(fn)+5)
-        #field_indices.append(header.index(fn)+8)
-
-    out_f = open(opts.output,'w') if opts.output else sys.stdout
-    for line in joined_output :
-        line = line.split('\t')
-        out_f.write('\t'.join([line[i] for i in field_indices])+'\n')
-
-    if opts.output :
-        out_f.close()
--- a/chipsequtil-master/scripts/construct_bg_fasta.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,235 +0,0 @@
-#!/usr/bin/env python
-
-import os
-import sys
-import warnings
-
-from collections import defaultdict
-from optparse import OptionParser
-
-from chipsequtil import get_org_settings, RefGeneFile
-from chipsequtil.nib import NibDB
-from chipsequtil.util import MultiLineHelpFormatter
-from TAMO.seq import Fasta
-
-usage='%prog [options] <type> <organism> <foreground fasta>'
-description='Create background sequence databses for motif finding, etc.'
-parser = OptionParser(usage=usage,description=description,formatter=MultiLineHelpFormatter())
-
-
-def rejection_sampling(fg,settings_dict,gc_bins=20) :
-
-    genm_db = NibDB(settings_dict['genome_dir'])
-    annot = RefGeneFile(settings_dict['annotation_file'])
-
-
-    num_peak_bases = 0
-    for header, seq in fg.items() :
-        num_peak_bases += len(seq)
-
-
-if __name__ == '__main__' :
-
-    opts, args = parser.parse_args(sys.argv[1:])
-
-    if len(args) < 3 :
-        parser.error('Must provide three non-option arguments')
-
-    sample_type, organism, fg_fn = args[:3]
-
-    settings_dict = get_org_settings(organism)
-
-    fg = Fasta.load(fg_fn)
-    bg = rejection_sampling(fg,settings_dict)
-
-
-###############################################################
-# start Chris' code from rej_samp_bg_rand2.py
-    the_genes={} #list of distances to nearest TSS
-
-    # for each peak find the chromosome, distance to nearest
-    # gene, size of peaks in bases, and GC content
-    the_chrs,dists,sizes,gcs=[],[],[],[]
-
-    # number of bases in the fg sequences
-    size=0
-
-    for key in pos_seqs.keys():
-
-        size+=len(pos_seqs[key])
-
-        # chromosome first field in fasta headers from bed2seq.bedtoseq
-        chr=key.split(':')[0]
-
-        # adjust chromosomes in special cases
-        if re.search('random',chr):
-            continue
-        if chr=='chr20':
-            chr='chrX'
-        elif chr=='chr21':
-            chr='chrY'
-        if not the_genes.has_key(chr):
-            the_genes[chr]=[]
-
-        # start first int in second field of bed2seq.bedtoseq header
-        start=int(key.split(':')[1].split('-')[0])
-        midpoint=int(start+len(pos_seqs[key])/2)
-
-        # figure out which chromosome we're working on
-        tss_chr=tss[chr.split('chr')[-1]]
-
-        # D is the distances from all the genes, find minimum
-        D=[(s[0]-midpoint) for s in tss_chr]
-
-        # best distance for this peak
-        minD=min([abs(x) for x in D])
-        best=[d for d in D if abs(d)==minD]
-        dists.append(best[0])
-
-        # chromosome for this peak
-        the_chrs.append(chr)
-        seq=pos_seqs[key]
-
-        # calculate # bases and GC content
-        N=len(seq)
-        sizes.append(N)
-        gc=len([x for x in seq if (x=='G')or(x=='C')])/N
-        gcs.append(gc)
-
-    #bin GC content distribution
-    bins=20
-
-    # q is # of peaks w/ x% GC content
-    q=[0]*bins
-
-    for gc in gcs:
-        for i in range(bins):
-            win_start=i/bins
-            win_end=(i+1)/bins
-            if gc>=win_start and gc<win_end:
-                q[i]+=1
-                continue
-
-    # q is now % peaks w/ x% GC content
-    q=[x/Nseqs for x in q]
-    #print q
-
-    # c is # peaks w/ highest GC content
-    c=max(q)*Nseqs
-
-    # start generating bg sequences
-    print "Done assembling distance and gc content distributions"
-    genome_outfile=open(bg,'w')
-
-    # make twice as many
-    size=round(size/(2*len(pos_seqs)))
-    bg_gcs,bg_sizes=[],[]
-    #for key in the_genes.keys():
-        #chrom=key.split('chr')[-1]
-        #the_genes[key]=[x[0] for x in tss[chrom]]
-
-    # C_TX is a list of all genes in (chromosome,gene start) tuples
-    C_TX=[]
-    for key in tss.keys():
-        chrom=key.split('chr')[-1]
-        for x in tss[chrom]:
-            C_TX.append((chrom,x[0]))
-
-    # generate a bg sequence for every fg sequence
-    for i in range(Nseqs):
-
-        # propose sequences until one is accepted
-        keep_going=1
-        while keep_going:
-            #random.shuffle(the_chrs)
-
-            # randomize the list of distances from genes
-            random.shuffle(dists)
-            #chr=the_chrs[0]
-
-            # pick the first distance, i.e. at random
-            d=dists[0]
-
-            #random.shuffle(the_genes[chr])
-
-            # randomize the gene list
-            random.shuffle(C_TX)
-
-            # randomize the peak sizes
-            random.shuffle(sizes)
-
-            # pick a random gene
-            (chr,coord)=C_TX[0]
-
-            #coord=the_genes[chr][0]
-            # propose a starting point for the bg sequence
-            midpoint=coord-d+random.randint(-100,100)
-
-            # propose a starting size for the bg sequence
-            size=sizes[0]
-            start=int(midpoint-int(size/2))
-            stop=int(midpoint+int(size/2))
-            id='chr'+chr.split('chr')[-1]+':'+str(start)+'-'+str(stop)
-            r=random.random()
-
-            # randomly choose strand
-            if r<0.5: strand='+'
-            else: strand='-'
-
-            # extract the proposed sequence
-            nib_title,seq=nibfrag.sequence('chr'+chr,start, stop,strand)
-            if not seq:
-                print 'NOT FOUND', chr,start,stop,
-                continue
-            else:
-
-                N,y=0,0
-                # calculate the GC content for the proposed sequence
-                for line in seq:
-                    s=line.upper()
-                    N+=len(line)
-                    y+=len([x for x in s if (x=='G')or(x=='C')])
-                    if line[0]=='N': continue
-                x=float(y)/N
-
-                # determine the GC bin for this sequence
-                #gc=float(len([x for x in seq if (x=='G')or(x=='C')]))/N
-                for i in range(bins):
-                    win_start=i/bins
-                    win_end=(i+1)/bins
-                    if x>=win_start and x<win_end:
-                        bin=i
-                        continue
-
-                # pick a uniform random number such that it does not exceed
-                # the maximum GC content distribution over bins
-                r=random.random()*c/Nseqs
-
-                # if the random number is <= the GC content for this
-                # proposed sequence, accept, otherwise reject
-                if r>q[bin]:
-                    #print 'skip'
-                    continue
-                else:
-                    #print bin
-                    bg_gcs.append(x)
-                    bg_sizes.append(size)
-                    keep_going-=1
-                    title='>%s\n'%id
-                    genome_outfile.write(title)
-                    for line in seq:
-                        genome_outfile.write(line.upper()+'\n')
-    print len(gcs)
-    print len(bg_gcs)
-    fg_mean,fg_sdev=mean_sdev(gcs)
-    print fg_mean,fg_sdev
-    #bg_mean,bg_sdev=mean_sdev(bg_gcs)
-    bg_mean=scipy.mean(bg_gcs)
-    bg_sdev=scipy.std(bg_gcs)
-    print bg_mean,bg_sdev
-    fg_size_m,fg_size_dev=mean_sdev(sizes)
-    bg_size_m,bg_size_dev=mean_sdev(bg_sizes)
-    print fg_size_m,fg_size_dev
-    print bg_size_m,bg_size_dev
-    genome_outfile.close()
-
--- a/chipsequtil-master/scripts/create_pipeline_script.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,385 +0,0 @@
-#!/usr/bin/env python
-
-from __future__ import with_statement
-import getpass
-import json
-import os
-import textwrap
-
-try:
-    import readline
-    import glob
-    readline.parse_and_bind("tab: complete")
-    readline.set_completer_delims('')
-
-    comp_states = {}
-    def basic_complete_file(text,state) :
-        #if text.strip() == '' :
-        #    text = './'
-        options = dict([(i,p) for i,p in enumerate(glob.glob(text+'*'))])
-        return options.get(state,None)
-
-    readline.set_completer(basic_complete_file)
-
-except ImportError:
-    print "Module readline not available."
-
-import re
-import stat
-import sys
-from optparse import OptionParser
-from subprocess import Popen, PIPE
-
-import chipsequtil
-from chipsequtil import get_global_settings, get_local_settings, check_org_settings, GLOBAL_SETTINGS_FN, LOCAL_SETTINGS_FN
-from terminalcontroller import TERM_ESCAPE, announce, warn, error, white, bold
-
-usage = "%prog"
-description = """Script for creating a custom run script for
-ChIPSeq/DNAse hypersensitivity experiments.  User is asked for
-paths and settings required for ChIPSeq analysis using the *chipseq_pipeline.py*
-utility and produces an executable run script with helpful information on how to
-run it.  Also creates a JSON formatted file containing all the parameters for
-this pipeline run."""
-epilog = "Note: this script only works in Unix-style environments"
-parser = OptionParser(usage=usage,description=description,epilog=epilog)
-
-
-script_template = """\
-#!/bin/bash
-
-# required parameters for the pipeline
-ORG=%(organism)s
-EXP_FN=%(exp_path)s
-CNT_FN=%(cnt_path)s
-
-# chipseq_pipeline.py is the main workhorse of this analysis
-# you may change any of the arguments below from their defaults
-
-chipseq_pipeline.py $ORG $EXP_FN $CNT_FN \\
-%(def_args)s
-"""
-
-start_text = """\
-This is an interactive script that creates an executable script to use for
-ChIPSeq analyses. When prompted for experiment and control files, tab
-completion is available a la bash or tcsh shells. Press Ctrl-C at any time to
-quit.
-"""
-
-end_text = """The script %(script_fn)s has been created to run this pipeline. \
-The script can now be run with:
-
-$> ./%(script_fn)s
-
-Have a nice day."""
-
-
-
-def wb(st) :
-    sys.stdout.write(white(bold(st)))
-
-
-def input(st,default=None) :
-
-    if default is None :
-        default_str = ''
-    else :
-        default_str = ' [default: ' + default + ' ] '
-
-    out = None
-    while out is None :
-        out = raw_input(white(bold(st))+default_str+white(bold(':'))+' \n')
-        if len(out) == 0 :
-            out = default
-
-    return out
-
-
-if __name__ == '__main__' :
-
-    TERM_ESCAPE = True
-
-    try :
-
-        pipeline_args = {}
-
-        # herro
-        announce('ChIPSeq Experiment Pipeline Script Generator')
-        print textwrap.fill(start_text)
-
-        opts, args = parser.parse_args(sys.argv[1:])
-        if len(args) > 0 :
-            warn("Arguments were passed, but this script doesn't accept any arguments, rudely ignoring them...\n")
-
-        # this dictionary will be used to generate a JSON formatted file with
-        # all the relevant settings for the pipeline
-        json_dict = {}
-
-        ############################################################################
-        # name of the experiment
-        ############################################################################
-        def_path = os.path.basename(os.getcwd())
-        exp_name = input('Experiment name',def_path)
-        exp_name = exp_name.replace(' ','_') # shhhhhhhh...
-
-        json_dict['experiment name'] = exp_name
-        json_dict['analysis path'] = os.getcwd()
-
-        ############################################################################
-        # experiment and control file
-        ############################################################################
-        align_text = "The pipeline can accept either BED, BOWTIE, SAM, or " \
-        "ELANDEXPORT formatted alignment files. SAM is the default " \
-        "format of files provided by the BMC pipeline.  Both experiment " \
-        "and control files must have the same format."
-        print textwrap.fill(align_text)
-
-        align_fmt = input("Which format are the alignment files in?",'SAM')
-        exp_path = input('Experiment alignment path')
-        exp_path = exp_path.strip()
-
-        lims_exp_url = input('Experiment LIMS sample URL, if applicable','none')
-        lims_exp_url = lims_exp_url.strip()
-
-        cntrl_path = input('Control alignment path (leave blank for no control)','none')
-        cntrl_path = cntrl_path.strip()
-
-        lims_cntrl_url = input('Control LIMS sample URL, if applicable','none')
-        lims_cntrl_url = lims_cntrl_url.strip()
-
-        if cntrl_path == 'none' :
-            cntrl_path = ''
-
-        if cntrl_path == '' :
-            print 'Analysis will be run with no control'
-
-        json_dict['experiment path'] = os.path.realpath(exp_path)
-        json_dict['experiment lims url'] = lims_exp_url
-        json_dict['control path'] = os.path.realpath(cntrl_path) if cntrl_path != '' else 'none'
-        json_dict['control lims url'] = lims_cntrl_url
-
-        ############################################################################
-        # organism + settings
-        ############################################################################
-        announce('Organism settings configuration')
-        global_settings = get_global_settings()
-        local_settings = get_local_settings()
-        valid_org_settings = global_settings.keys() + local_settings.keys()
-        valid_org_settings.sort()
-
-        org_text = """\
-Below are the organism settings available on this system.  The pipeline will
-use the settings for one organism (e.g. %(org)s) for the entire execution. If
-you do not see a set of settings that correspond to files you need you may
-add your own to %(local_org)s.  See %(glob_org)s for details.
-"""
-
-        print textwrap.fill(org_text%{'org':valid_org_settings[0],'local_org':LOCAL_SETTINGS_FN,'glob_org':GLOBAL_SETTINGS_FN},break_long_words=False)
-        print
-
-        wb('Available settings\n')
-        # global settings
-        print 'Global settings: (%s)'%GLOBAL_SETTINGS_FN
-        org_sets = [(k,global_settings[k]) for k in sorted(global_settings.keys())]
-        for org, settings in org_sets :
-            wb(org.ljust(8))
-            print ':', settings.get('description','No description')
-            #for k,v in settings.items() :
-            #    print ' '*4+k+": "+str(v)
-
-        # local settings
-        print 'Local settings: (%s)'%LOCAL_SETTINGS_FN
-        org_sets = [(k,local_settings[k]) for k in sorted(local_settings.keys())]
-        for org, settings in org_sets :
-            wb(org.ljust(8))
-            print ':', settings.get('description','No description')
-            #for k,v in settings.items() :
-            #    print ' '*4+k+": "+str(v)
-        org = ''
-        all_settings = {}
-        all_settings.update(global_settings)
-        all_settings.update(local_settings)
-
-        while org not in valid_org_settings :
-            org = input('Choose organism configuration, one of ('+','.join(valid_org_settings)+')')
-
-            # check for the required settings
-            required_settings = ['description','genome_dir','refgene_anno_path','theme_hypotheses','theme_markov']
-            if not check_org_settings(org,required_settings) :
-                warn(textwrap.fill('Selected organism settings must have the following settings defined:\n\
-                     %s\n\
-                     Either select another organism or define these settings in your local\
-                     configuration file.'%required_settings))
-                org = ''
-        print
-
-        json_dict['org'] = org
-
-        ############################################################################
-        # UCSC
-        ############################################################################
-
-        ucsc_text = """The pipeline can include a step to automatically make called
-peak data available on the web for integration with UCSC genome browser."""
-
-        print textwrap.fill(ucsc_text,break_long_words=False)
-
-        ucsc_integrate = input('Would you like to integrate this analysis with UCSC genome browser [y/n]?','y')
-        ucsc_integrate = False if ucsc_integrate == 'n' else True
-        ucsc_args = ''
-        stage_dir = '/nfs/antdata/web_stage/%s'%getpass.getuser()
-        stage_url = 'http://fraenkel.mit.edu/stage/%s'%getpass.getuser()
-        if ucsc_integrate :
-            ucsc_args = ['--ucsc']
-            ucsc_args = ' '.join(ucsc_args)
-
-        pipeline_args['--stage-dir'] = stage_dir
-        pipeline_args['--stage-url'] = stage_url
-
-        json_dict['stage dir'] = stage_dir
-        json_dict['stage url'] = stage_url
-
-        # TODO - consider letting user set these on script creation time
-        # any utility specific arguments?
-        #  - MACS
-        #  - THEME
-
-
-        ############################################################################
-        # various pipeline parameters
-        ############################################################################
-
-        # --macs-args
-        macs_args = ['--mfold=10,30','--format=%s'%align_fmt]
-        pval = ''
-        while not re.search('^\de-\d+$',pval) :
-            pval = input('What p-value should MACS use as a cutoff?','1e-5')
-        macs_args.append('--pvalue=%s'%pval)
-        pipeline_args['--macs-args'] = ' '.join(macs_args)
-
-        # --map-args
-        map_args = []
-        tss = ''
-        while tss.upper() not in ('TSS','GENE') :
-            tss = input('Should gene mapping be made in relation to transcription start site or full gene coordinates [TSS/gene]?','TSS')
-        if tss == 'TSS' :
-            map_args.append('--tss')
-
-        window = ''
-        while not re.search('^\d+,\d+$',window) :
-            window = input('What window would you like to use for mapping peaks to genes (upstream bases,downstream bases)?','10000,10000')
-        upstr, downstr = window.split(',')
-        map_args.extend(['--upstream-window=%s'%upstr,'--downstream-window=%s'%downstr])
-        pipeline_args['--map-args'] = ' '.join(map_args)
-
-        # --filter-peaks-args
-        filt_args =  ['--sort-by=pvalue']
-        fdr = ''
-        while not re.search('^\d+(\.\d+)?',fdr) and fdr != 'none' :
-            fdr = input('What FDR cutoff should be used, in %?','none')
-        if fdr != 'none' :
-            filt_args.append("--filter='fdr<%s'"%fdr)
-
-        top = ''
-        while not re.search('^\d+$',top) and top != 'ALL' :
-            top = input('How many peak sequences should be used for motif discovery when sorted by p-value [<# peaks>/ALL]','1000')
-        if top != 'ALL' :
-            filt_args.append('--top=%s'%top)
-
-        # tag filter for both pos and neg peaks
-        tags = ''
-        filt_neg_args = []
-        while not re.search('^\d+$',tags) and tags != 'ALL' :
-            tags = input('What tag count cutoff should be used as a minimum for positive and negative peaks? [<# peaks>/None]','20')
-        if tags != 'None' :
-            filt_args.append("--filter='tags>%s'"%tags)
-            filt_neg_args.append("--filter='tags>%s'"%tags)
-        pipeline_args['--filter-peaks-args'] = ' '.join(filt_args)
-        pipeline_args['--filter-neg-peaks-args'] = ' '.join(filt_neg_args)
-
-        # --peaks-to-fa-args
-        peaks_to_fa_args = []
-        width = ''
-        while not re.search('^\d+$',width) and width != 'NA' :
-            width = input('What width around peak summit should be used for motif analysis (NA to use entire peak)? [<# bases>/NA]','200')
-        if width != 'NA' :
-            peaks_to_fa_args.append('--fixed-peak-width=%s'%width)
-        else :
-            width = 'none'
-        pipeline_args['--peaks-to-fa-args'] = ' '.join(peaks_to_fa_args)
-
-        # --parallelize
-        parallel = input('Use cluster parallelization [y/n]?','y')
-        parallel = '--parallelize' if parallel.lower() != 'n' else ''
-
-        # each user-specified argument gets its own key
-        json_dict['format'] = align_fmt
-        json_dict['mapping type'] = tss
-        json_dict['mapping window'] = (upstr,downstr)
-        json_dict['FDR filter'] = fdr
-        json_dict['peaks used by THEME'] = top
-        json_dict['fixed peak width'] = width
-        json_dict['parallelize'] = parallel != ''
-        json_dict['peak tag count filter'] = tags
-
-        # put all the command line utility args in json_dict as its own dict
-        json_dict['pipeline args'] = pipeline_args
-
-        ############################################################################
-        # done with input, creating script and other stuff
-        ############################################################################
-        # if the experiment and control files are in a different directory,
-        # create symlinks for them
-        exp_dir,exp_fn = os.path.split(os.path.abspath(exp_path))
-        if exp_dir != os.getcwd() :
-            wb('Creating symlink for experiment file...\n')
-            if os.path.exists(exp_fn) :
-                if os.path.realpath(exp_fn) != os.path.abspath(exp_path) : # existing symlink  doesn't point to the same file, prompt to overwrite
-                    ans = raw_input('Symlink %s in current directory points to %s but you asked for %s, overwrite symbolic link? y/[n] '%(exp_fn,os.path.realpath(exp_fn),os.path.abspath(exp_path)))
-                    if ans == 'y' :
-                        os.remove(exp_fn)
-                        exp_fn = 'exp_'+exp_fn
-                        os.symlink(exp_path,exp_fn)
-            else :
-                exp_fn = 'exp_'+exp_fn
-                os.symlink(exp_path,exp_fn)
-
-        if cntrl_path != '' :
-            cntrl_dir,cntrl_fn = os.path.split(os.path.abspath(cntrl_path))
-            if cntrl_dir != os.getcwd() :
-                wb('Creating symlink for control file...\n')
-                if os.path.exists(cntrl_fn) :
-                    if os.path.realpath(cntrl_fn) != os.path.abspath(cntrl_path) : # existing symlink  doesn't point to the same file, prompt to overwrite
-                        ans = raw_input('Symlink %s in current directory points to %s but you asked for %s, overwrite symbolic link? y/[n] '%(cntrl_fn,os.path.realpath(cntrl_fn),os.path.abspath(cntrl_path)))
-                        if ans == 'y' :
-                            os.remove(cntrl_fn)
-                            cntrl_fn = 'cntrl_'+cntrl_fn
-                            os.symlink(cntrl_path,cntrl_fn)
-                else :
-                    cntrl_fn = 'cntrl_'+cntrl_fn
-                    os.symlink(cntrl_path,cntrl_fn)
-        else :
-            cntrl_fn = ''
-
-        # get default chipseq_pipeline.py args
-        pipeline_args = ' '.join(['%s="%s"'%(k,v) for k,v in pipeline_args.items()])
-        print 'chipseq_pipeline.py --exp-name=%s %s %s --print-args'%(exp_name,ucsc_args,pipeline_args)
-        def_args = Popen('chipseq_pipeline.py --exp-name=%s %s %s %s --print-args'%(exp_name,ucsc_args,parallel,pipeline_args),shell=True,stdout=PIPE,stderr=PIPE).communicate()[0]
-
-        wb('Creating script...\n')
-        script_fn = '%s_pipeline.sh'%exp_name
-        with open(script_fn,'w') as script_f :
-            script_f.write(script_template%{'exp_path':exp_fn,'cnt_path':cntrl_fn,'organism':org,'exp_name':exp_name,'def_args':def_args})
-            os.chmod(script_f.name,stat.S_IRWXU|stat.S_IRWXG|stat.S_IROTH)
-
-        print end_text%{'script_fn':script_fn}
-
-        wb('Creating parameter file...\n')
-        json_fn = '%s_params.json'%exp_name
-        with open(json_fn,'w') as json_f :
-            json.dump(json_dict,json_f,indent=4)
-
-    except KeyboardInterrupt :
-        sys.stderr.write('\n')
-        error('Script creation interrupted, aborting')
--- a/chipsequtil-master/scripts/extract_promoters.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,79 +0,0 @@
-#!/usr/bin/env python
-
-import re
-import sys
-from csv import writer
-from optparse import OptionParser
-
-from collections import defaultdict
-
-from chipsequtil import get_org_settings, RefGeneFile
-from chipsequtil.nib import NibDB
-from chipsequtil.util import MultiLineHelpFormatter as MF
-
-usage = "%prog [options] <organism>"
-description = """Extract the promoter sequences in FASTA format from all genes
-or a list of genes specified in an input file.  Gene annotation is RefGene
-corresponding to the organism passed in, paths returned by:
-
-$> org_settings.py <organism> refgene_anno_path
-$> org_settings.py <organism> genome_dir
-
-must be valid."""
-parser = OptionParser(usage=usage,description=description,formatter=MF())
-parser.add_option('-u','--upstream',type='int',default=3000,help='upstream window from TSS to extract [default: %default]')
-parser.add_option('-d','--downstream',type='int',default=1000,help='downstream window from TSS to extract [default: %default]')
-parser.add_option('-l','--gene-list',dest='gene_list',default=None,
-                  help='file containing a list of gene identifiers to extract, one per line [default: %default]')
-gene_type_choices = ['symbol','refgene']
-parser.add_option('-t','--gene-type',dest='gene_type',type='choice',
-                  choices=gene_type_choices,default=gene_type_choices[0],
-                  help='type of gene identifier in gene list, choose from %s [default: %%default]'%gene_type_choices)
-parser.add_option('-o','--output',dest='output',default=None,
-                  help='file to write fasta records to [default: stdout]')
-
-if __name__ == '__main__' :
-
-    opts, args = parser.parse_args(sys.argv[1:])
-
-    if len(args) != 1 :
-        parser.error('Exactly one argument is required')
-
-    org_settings = get_org_settings(args[0])
-
-    refgene_fn = org_settings['refgene_anno_path']
-    refgene_f = RefGeneFile(refgene_fn)
-
-    nib_db = NibDB(nib_dirs=[org_settings['genome_dir']])
-
-    gene_list = None
-    if opts.gene_list :
-        gene_list = [x.strip() for x in open(opts.gene_list).readlines()]
-
-    id_index = 'bin'
-    if opts.gene_type != gene_type_choices[0] :
-        if opts.gene_type  == 'refgene' :
-            id_index = 'name'
-
-    seq_recs = []
-    gene_map = defaultdict(list)
-    for rec in refgene_f :
-        if gene_list and rec[id_index] not in gene_list : continue # skip this one
-        st, end = max(0,int(rec['txStart'])-opts.upstream), min(int(rec['txStart'])+opts.downstream,nib_db.db_info[rec['chrom']]['nbases'])
-        key = (rec['chrom'],st,end,rec['strand'])
-        seq_recs.append(key)
-        gene_map[key[:-1]].append(rec['bin']+'/'+rec['name'])
-
-    fasta_recs = nib_db.get_fasta_batch(seq_recs)
-
-    out_f = open(opts.output,'w') if opts.output else sys.stdout
-    header_regex = re.compile('^.*(chr[0-9MXY]+).*:([0-9]+)-([0-9]+).*$')
-    for header, seq in zip(*fasta_recs) :
-        # map sequences back to gene names using the header
-        reg_obj = header_regex.search(header)
-        if reg_obj is not None :
-            chrm,st,end = reg_obj.groups()
-            gene_names = gene_map.get((chrm,int(st),int(end)))
-            if gene_names is not None :
-                header = header.strip()+':'+','.join(gene_names)+'\n'
-        out_f.write(header+seq+'\n')
--- a/chipsequtil-master/scripts/filter_bed_by_position_count.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,39 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-from csv import reader, writer
-from optparse import OptionParser
-
-usage = '%prog [options] <bed file>'
-description = """Analyze BED file and filter out alignments above some threshold \
-that align to a single genomic position."""
-epilog="Note: only works if BED file is sorted!"
-parser = OptionParser(usage=usage,description=description,epilog=epilog)
-parser.add_option('-n','--max-count',dest='max_count',default=5,type='int',help='max tag count at a given position, filter above [default: %default]')
-parser.add_option('--output',dest='output',default=None,help='write output to file')
-
-if __name__ == '__main__' :
-
-    opts, args = parser.parse_args(sys.argv[1:])
-
-    if len(args) != 1 :
-        parser.error('Exactly one sorted .bed file is required')
-
-    bed_fn = args[0]
-
-    bed_reader = reader(open(bed_fn),delimiter='\t')
-    out_f = open(opts.output,'w') if opts.output else sys.stdout
-    bed_writer = writer(out_f,delimiter='\t')
-
-    curr_key, curr_key_count = None, 0
-    for rec in bed_reader :
-        key = rec[:3] # chromosome, start, end
-        if key != curr_key :
-            curr_key, curr_key_count = key, 0
-        if curr_key_count < opts.max_count :
-            bed_writer.writerow(rec)
-            curr_key_count += 1
-        else :
-            continue
-    if opts.output : out_f.close()
--- a/chipsequtil-master/scripts/filter_gps_peaks.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,215 +0,0 @@
-#!/usr/bin/env python
-
-import re
-import os
-import sys
-from collections import defaultdict
-from optparse import OptionParser, SUPPRESS_HELP
-from random import shuffle
-
-from chipsequtil import GPSFile, get_file_parts
-from chipsequtil.util import MultiLineHelpFormatter as MF
-from terminalcontroller import warn
-
-usage = "%prog [options] <GPS peak file>"
-description = """\
-Filter GPS peaks by supplied criteria.  Available filter features are:
-
-IP
-Control
-Fold
-qvalue
-pvalue
-IPvsEMP
-IPvsCTR
-
-Filters are provided as expressions using the [-f |--filter] option, e.g. the command
-
-%prog -f "IP>100" --filter="pvalue<=1e-9" <GPS peak file>
-
-finds only peaks with more than 100 tags and a pvalue of less than 1e9.  Any
-number of filters may be provided, and only peaks that match *all* filters pass. \
-User is warned if filters result in zero results.  Only inequality operators are \
-valid.  Invoking with no filter arguments returns all peaks.  To sort, use the \
---sort-by option, e.g.
-
-%prog -f "pvalue<=1e-9" --sort-by=pvalue <GPS peak file>
-
-sorts peaks with a pvalue smaller than 1e-9 with the smallest pvalue peaks.  All fields \
-are sorted ascending by default.  Output is prepended with comments describing what \
-the file contains, i.e. which filters are applied, how many records there are, etc.
-
-Note: GPS P_-log10 and Q_-log10 values are converted to normal pvalues and qvalues
-"""
-
-parser = OptionParser(usage=usage,description=description,formatter=MF())
-parser.add_option('-f','--filter',dest='filters',default=[],action='append',help='add filter expression')
-parser.add_option('--sort-by',dest='sort_by',default='',help='comma delimited list of features to sort by, filtered peaks are not sorted by default, if provided peaks are sorted ascending by default')
-parser.add_option('--sort-dir',dest='sort_dir',type='choice',choices=['ASCEND','DESCEND'],default='ASCEND',help='direction to sort [default: %default]')
-parser.add_option('--top',dest='top',type='int',default=None,help='accepts an integer, output at most this many peaks [default: all]')
-parser.add_option('--output',dest='output',default=None,help='filename to output filtered peaks to [default: stdout]')
-parser.add_option('--encode-filters',dest='encode_filters',action='store_true',help='write out records to a file <GPS peaks file>_<filters>.xls (incompatible with --output option)')
-parser.add_option('--summary',dest='summary',action='store_true',help='only print out summary information for the filter')
-parser.add_option('--no-header',dest='no_header',action='store_true',help='do not print out header or metadata info')
-parser.add_option('--shuffle',dest='shuffle',action='store_true',help='shuffle order of filtered records, useful for selecting random peaks')
-
-parser.add_option('--print-encoded-fn',dest='print_encoded_fn',action='store_true',help="print out the filename that would be created by --encode-filters")
-
-# make condition function objects using closures
-_lt = lambda x,y : x < y
-_lte = lambda x,y : x <= y
-_gt = lambda x,y : x > y
-_gte = lambda x,y : x >= y
-_cond_map = {'<':_lt,'<=':_lte,'>':_gt,'>=':_gte,None:None}
-
-def make_condition(low_val=None,low_test=_gte,high_val=None,high_test=_lte) :
-    if low_val and not high_val :
-        return lambda x: low_test(low_val,x)
-    elif not low_val and high_val :
-        return lambda x: high_test(x,high_val)
-    elif low_val and high_val :
-        return lambda x: low_test(low_val,x) and high_test(x,high_val)
-    else :
-        return lambda x: True # identity with no constraints
-
-# regex and function for parsing filter strings
-numeric_regex_str = r'\d+(?:\.\d*)?(?:(?:e|E)-?\d+)?' # matches numeric-looking patterns, e.g. 1, 1.234, 1e4, 1.234E-5, etc.
-separator_regex_str = r'(?:>|>=|<|<=)'
-ids_regex_str = r'(?:IP|Control|Fold|qvalue|pvalue|IPvsEMP|IPvsCTR)'
-filter_regex = re.compile('^(%(num)s)?(%(sep)s)?(%(id)s)(%(sep)s)?(%(num)s)?$'%{'num':numeric_regex_str,'sep':separator_regex_str,'id':ids_regex_str})
-
-class FilterException(Exception) : pass
-
-def parse_filter(filter_str) :
-    match = filter_regex.search(filter_str.strip())
-    if match is None :
-        raise FilterException('Filter %s is formatted incorrectly'%filter_str)
-    low_val, low_test, field, high_test, high_val = match.groups()
-    low_val = float(low_val) if low_val else low_val
-    high_val = float(high_val) if high_val else high_val
-    return field, make_condition(low_val,_cond_map[low_test],high_val,_cond_map[high_test])
-
-_sort_keys = {'length': lambda x: int(x[3]),
-              'tags': lambda x: int(x[5]),
-              'pvalue': lambda x: 10**(float(x[6])/-10),
-              'fold_enrichment': lambda x: float(x[7]),
-              'fdr': lambda x: float(x[8]),
-             }
-
-
-summary_str = """\
-# This output was generated by filter_macs_peaks.py, filtered from %(macs_fn)s
-# Number of peaks: %(num_recs)d
-# Filters: %(filters)s
-# Sorted by: %(sort_by)s
-# Shuffled: %(shuffled)s
-"""
-if __name__ == '__main__' :
-
-    opts, args = parser.parse_args(sys.argv[1:])
-
-    if len(args) < 1 :
-        parser.error('Must provide one GPS peaks file')
-
-    if opts.output is not None and opts.encode_filters :
-        parser.error('--output and --encode-filters options are mutually exclusive')
-
-    # set where to write output
-    if opts.encode_filters :
-        # construct filename additions
-        fn_str = ''
-        opts.filters.sort()
-        for filt in opts.filters :
-            filter_str = filt.replace(' ','')
-            filter_str = filter_str.replace('>=','_GTE_')
-            filter_str = filter_str.replace('<=','_LTE_')
-            filter_str = filter_str.replace('>','_GT_')
-            filter_str = filter_str.replace('<','_LT_')
-            fn_str += '_%s'%filter_str
-
-        if opts.top is not None :
-            fn_str += '_top%d'%opts.top
-
-        if len(opts.sort_by) != 0 :
-            fn_str += '_sortby_%s'%opts.sort_by
-
-        if opts.shuffle :
-            fn_str += '_shuffled'
-
-        macs_path,macs_fn,macs_basefn,macs_ext = get_file_parts(args[0])
-        encoded_fn = os.path.join(macs_path,macs_basefn+fn_str+macs_ext)
-        if opts.print_encoded_fn :
-            sys.stdout.write(encoded_fn)
-            sys.exit(0)
-        else :
-            out_f = open(encoded_fn,'w')
-    elif opts.output :
-        out_f = open(opts.output,'w')
-    else :
-        out_f = sys.stdout
-
-    # parse the filters
-    field_filters = defaultdict(list)
-    for filter in opts.filters :
-        field, filter_cond = parse_filter(filter)
-        field_filters[field].append(filter_cond)
-
-    # start processing GPS file
-    peaks = GPSFile(args[0])
-
-    # filter the records
-    pass_recs = []
-    for peak in peaks :
-        # test each of the fields, if any one fails skip the record
-        if not all([c(int(peak['IP'])) for c in field_filters['IP']]) or \
-           not all([c(int(peak['Control'])) for c in field_filters['Control']]) or \
-           not all([c(float(peak['Fold'])) for c in field_filters['Fold']]) or \
-           not all([c(10**(float(peak['Q_-lg10'])/-10)) for c in field_filters['qvalue']]) or \
-           not all([c(10**(float(peak['P_-lg10'])/-10)) for c in field_filters['pvalue']]) or \
-           not all([c(float(peak['IPvsEMP'])) for c in field_filters['IPvsEMP']]) or \
-           not all([c(float(peak['IPvsCTR'])) for c in field_filters['IPvsCTR']]) :
-           continue
-        else :
-            pass_recs.append([peak[k] for k in GPSFile.FIELD_NAMES])
-
-    if len(pass_recs) == 0 :
-        warn('WARNING: no records remain after filtering\n')
-        sys.exit(1)
-
-    # sorting
-    if opts.sort_by :
-        pass_recs.sort(key=_sort_keys[opts.sort_by],reverse=opts.sort_dir != 'ASCEND')
-
-    # top records
-    num_recs = len(pass_recs) if not opts.top else min(len(pass_recs),opts.top)
-
-    # construct the summary string
-    filters_str = 'none' if len(opts.filters) == 0  else ', '.join(opts.filters)
-    sort_str = 'original order' if not opts.sort_by else opts.sort_by+', '+opts.sort_dir
-    shuffled_str = str(opts.shuffle)
-    summary = summary_str%{'macs_fn':args[0],'num_recs':num_recs,
-                           'filters':filters_str,
-                           'sort_by':sort_str,
-                           'shuffled':shuffled_str}
-
-    # print summary only
-    if opts.summary :
-        sys.stdout.write(summary)
-        sys.exit(0)
-
-    # write out the header cuz it's a nice thing to do
-    if not opts.no_header :
-        out_f.write(summary)
-        out_f.write('\t'.join(GPSFile.FIELD_NAMES)+'\n')
-
-    # write out records
-    if opts.shuffle :
-        shuffle(pass_recs)
-    out_recs = pass_recs[:num_recs]
-
-    for rec in out_recs :
-        # rec[0] is a tuple of (chromosome,start pos,original string)
-        out_f.write('\t'.join([rec[0][2]]+map(str,rec[1:]))+'\n')
-
-    # good programming practice
-    out_f.close()
--- a/chipsequtil-master/scripts/filter_macs_peaks.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,210 +0,0 @@
-#!/usr/bin/env python
-
-import re
-import os
-import sys
-from collections import defaultdict
-from optparse import OptionParser, SUPPRESS_HELP
-from random import shuffle
-
-from chipsequtil import MACSFile, MACSOutput, get_file_parts
-from chipsequtil.util import MultiLineHelpFormatter as MF
-from terminalcontroller import warn
-
-usage = "%prog [options] <MACS peak file>"
-description = """\
-Filter MACS peaks by supplied criteria.  Available filter features are:
-
-length
-tags
-pvalue
-fold_enrichment
-fdr
-
-Filters are provided as expressions using the [-f |--filter] option, e.g. the command
-
-%prog -f "tags>100" --filter="pvalue<=1e-9" --filter="100<length<=200" <MACS peak file>
-
-finds only peaks with more than 100 tags, a pvalue of less than 1e9, and a length \
-between 100, exclusive, and 200, inclusive.  Any number of filters may be provided, \
-and only peaks that match *all* filters pass.  User is warned if filters result in \
-zero results.  Only inequality operators are valid.  Invoking with no filter arguments \
-returns all peaks.  To sort, use the --sort-by option, e.g.
-
-%prog -f "pvalue<=1e-9" --sort-by=pvalue <MACS peak file>
-
-sorts peaks with a pvalue smaller than 1e-9 with the smallest pvalue peaks.  All fields \
-are sorted ascending by default.  Output is prepended with comments describing what \
-the file contains, i.e. which filters are applied, how many records there are, etc.
-
-Note: MACS -10*log10(pvalue) values are converted to normal pvalues
-"""
-
-parser = OptionParser(usage=usage,description=description,formatter=MF())
-parser.add_option('-f','--filter',dest='filters',default=[],action='append',help='add filter expression')
-parser.add_option('--sort-by',dest='sort_by',default='',help='comma delimited list of features to sort by, filtered peaks are not sorted by default, if provided peaks are sorted ascending by default')
-parser.add_option('--sort-dir',dest='sort_dir',type='choice',choices=['ASCEND','DESCEND'],default='ASCEND',help='direction to sort [default: %default]')
-parser.add_option('--top',dest='top',type='int',default=None,help='accepts an integer, output at most this many peaks [default: all]')
-parser.add_option('--output',dest='output',default=None,help='filename to output filtered peaks to [default: stdout]')
-parser.add_option('--encode-filters',dest='encode_filters',action='store_true',help='write out records to a file <MACS peaks file>_<filters>.xls (incompatible with --output option)')
-parser.add_option('--summary',dest='summary',action='store_true',help='only print out summary information for the filter')
-parser.add_option('--no-header',dest='no_header',action='store_true',help='do not print out header or metadata info')
-parser.add_option('--shuffle',dest='shuffle',action='store_true',help='shuffle order of filtered records, useful for selecting random peaks')
-
-parser.add_option('--print-encoded-fn',dest='print_encoded_fn',action='store_true',help="print out the filename that would be created by --encode-filters")
-
-# make condition function objects using closures
-_lt = lambda x,y : x < y
-_lte = lambda x,y : x <= y
-_gt = lambda x,y : x > y
-_gte = lambda x,y : x >= y
-_cond_map = {'<':_lt,'<=':_lte,'>':_gt,'>=':_gte,None:None}
-
-def make_condition(low_val=None,low_test=_gte,high_val=None,high_test=_lte) :
-    if low_val and not high_val :
-        return lambda x: low_test(low_val,x)
-    elif not low_val and high_val :
-        return lambda x: high_test(x,high_val)
-    elif low_val and high_val :
-        return lambda x: low_test(low_val,x) and high_test(x,high_val)
-    else :
-        return lambda x: True # identity with no constraints
-
-# regex and function for parsing filter strings
-numeric_regex_str = r'\d+(?:\.\d*)?(?:(?:e|E)-?\d+)?' # matches numeric-looking patterns, e.g. 1, 1.234, 1e4, 1.234E-5, etc.
-separator_regex_str = r'(?:>|>=|<|<=)'
-ids_regex_str = r'(?:tags|pvalue|fold_enrichment|fdr|length)'
-filter_regex = re.compile('^(%(num)s)?(%(sep)s)?(%(id)s)(%(sep)s)?(%(num)s)?$'%{'num':numeric_regex_str,'sep':separator_regex_str,'id':ids_regex_str})
-
-class FilterException(Exception) : pass
-
-def parse_filter(filter_str) :
-    match = filter_regex.search(filter_str.strip())
-    if match is None :
-        raise FilterException('Filter %s is formatted incorrectly'%filter_str)
-    low_val, low_test, field, high_test, high_val = match.groups()
-    low_val = float(low_val) if low_val else low_val
-    high_val = float(high_val) if high_val else high_val
-    return field, make_condition(low_val,_cond_map[low_test],high_val,_cond_map[high_test])
-
-_sort_keys = {'length': lambda x: int(x[3]),
-              'tags': lambda x: int(x[5]),
-              'pvalue': lambda x: 10**(float(x[6])/-10),
-              'fold_enrichment': lambda x: float(x[7]),
-              'fdr': lambda x: float(x[8]),
-             }
-
-
-summary_str = """\
-# This output was generated by filter_macs_peaks.py, filtered from %(macs_fn)s
-# Number of peaks: %(num_recs)d
-# Filters: %(filters)s
-# Sorted by: %(sort_by)s
-# Shuffled: %(shuffled)s
-"""
-if __name__ == '__main__' :
-
-    opts, args = parser.parse_args(sys.argv[1:])
-
-    if len(args) < 1 :
-        parser.error('Must provide one MACS peaks file')
-
-    if opts.output is not None and opts.encode_filters :
-        parser.error('--output and --encode-filters options are mutually exclusive')
-
-    # set where to write output
-    if opts.encode_filters :
-        # construct filename additions
-        fn_str = ''
-        opts.filters.sort()
-        for filt in opts.filters :
-            filter_str = filt.replace(' ','')
-            filter_str = filter_str.replace('>=','_GTE_')
-            filter_str = filter_str.replace('<=','_LTE_')
-            filter_str = filter_str.replace('>','_GT_')
-            filter_str = filter_str.replace('<','_LT_')
-            fn_str += '_%s'%filter_str
-
-        if opts.top is not None :
-            fn_str += '_top%d'%opts.top
-
-        if len(opts.sort_by) != 0 :
-            fn_str += '_sortby_%s'%opts.sort_by
-
-        if opts.shuffle :
-            fn_str += '_shuffled'
-
-        macs_path,macs_fn,macs_basefn,macs_ext = get_file_parts(args[0])
-        encoded_fn = os.path.join(macs_path,macs_basefn+fn_str+macs_ext)
-        if opts.print_encoded_fn :
-            sys.stdout.write(encoded_fn)
-            sys.exit(0)
-        else :
-            out_f = open(encoded_fn,'w')
-    elif opts.output :
-        out_f = open(opts.output,'w')
-    else :
-        out_f = sys.stdout
-
-    # parse the filters
-    field_filters = defaultdict(list)
-    for filter in opts.filters :
-        field, filter_cond = parse_filter(filter)
-        field_filters[field].append(filter_cond)
-
-    # start processing MACS file
-    peaks = MACSFile(args[0])
-
-    # filter the records
-    pass_recs = []
-    for peak in peaks :
-        # test each of the fields, if any one fails skip the record
-        if not all([c(int(peak['length'])) for c in field_filters['length']]) or \
-           not all([c(int(peak['tags'])) for c in field_filters['tags']]) or \
-           not all([c(10**(float(peak['-10*log10(pvalue)'])/-10)) for c in field_filters['pvalue']]) or \
-           not all([c(float(peak['fold_enrichment'])) for c in field_filters['fold_enrichment']]) or \
-           not all([c(float(peak['FDR(%)'])) for c in field_filters['fdr']]) :
-           continue
-        else :
-            pass_recs.append([peak[k] for k in MACSOutput.FIELD_NAMES])
-
-    if len(pass_recs) == 0 :
-        warn('WARNING: no records remain after filtering\n')
-        sys.exit(1)
-
-    # sorting
-    if opts.sort_by :
-        pass_recs.sort(key=_sort_keys[opts.sort_by],reverse=opts.sort_dir != 'ASCEND')
-
-    # top records
-    num_recs = len(pass_recs) if not opts.top else min(len(pass_recs),opts.top)
-
-    # construct the summary string
-    filters_str = 'none' if len(opts.filters) == 0  else ', '.join(opts.filters)
-    sort_str = 'original order' if not opts.sort_by else opts.sort_by+', '+opts.sort_dir
-    shuffled_str = str(opts.shuffle)
-    summary = summary_str%{'macs_fn':args[0],'num_recs':num_recs,
-                           'filters':filters_str,
-                           'sort_by':sort_str,
-                           'shuffled':shuffled_str}
-
-    # print summary only
-    if opts.summary :
-        sys.stdout.write(summary)
-        sys.exit(0)
-
-    # write out the header cuz it's a nice thing to do
-    if not opts.no_header :
-        out_f.write(summary)
-        out_f.write('\t'.join(MACSOutput.FIELD_NAMES)+'\n')
-
-    # write out records
-    if opts.shuffle :
-        shuffle(pass_recs)
-    out_recs = pass_recs[:num_recs]
-
-    for rec in out_recs :
-        out_f.write('\t'.join(map(str,rec))+'\n')
-
-    # good programming practice
-    out_f.close()
--- a/chipsequtil-master/scripts/filter_mapped_known_genes.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,98 +0,0 @@
-#!/usr/bin/env python
-
-import re
-import sys
-
-from csv import reader, writer
-from collections import defaultdict as dd
-from optparse import OptionParser
-
-from chipsequtil.util import MultiLineHelpFormatter as MF
-
-usage = '%prog [options] <mapped known genes file>'
-description = """Filter columns and rows from *join_mapped_known_genes.py* output which was \
-invoked with *--binary-plus* and *--field-types* flags.  Specify full column names for either \
-binding or expression data with the *--bind-cols* and *--affy-cols* arguments, respectively. \
-The special fieldname *MAPPED* from *join_mapped_known_genes.py* is used to determine whether \
-a file contains a mapping for each gene.  To filter genes by their associated binding or \
-expression data, specify *--bind-filter* or *--affy-filter* as follows:
-
-  - *any* - report gene if at least one input file maps to the gene
-  - *all* - report gene if every input file maps to the gene
-  - *absent* - report gene if no input file maps to the gene
-  - *none* - do not filter genes at all (default)
-
-Results of binding and expression filters are 'and'ed together, e.g.:
-
---bind-filter=all --affy-filter=absent
-
-returns only genes for which all binding files and none of the expression files map.
-"""
-epilog='Note: when specifying column names, be sure to escape characters like (,),&,*,etc... \
-that shells interpret with a \\, e.g. --bind-cols=-10\\*log10\\(pvalue\\)'
-parser = OptionParser(usage=usage,description=description,epilog=epilog, formatter=MF())
-parser.add_option('--bind-cols',dest='bind_cols',default='',help='comma delimited list of binding data column names to include, [default: all]')
-parser.add_option('--affy-cols',dest='affy_cols',default='',help='comma delimited list of expression data column names to include, [default: all]')
-parser.add_option('--bind-filter',dest='bind_filt',type='choice',choices=['any','all','absent','none'],default='none',help='gene set to include based on binding data [default: %default]')
-parser.add_option('--affy-filter',dest='affy_filt',type='choice',choices=['any','all','absent','none'],default='none',help='gene set to include based on expression data [default: %default]')
-parser.add_option('--output',dest='output',default=None,help='write output to file')
-
-
-def match_headers(patts,field) :
-    for p in patts :
-        if field.endswith(p) : return True
-    return False
-
-def filter_vector(type,vec) :
-    if type == 'any' :
-        return '1' in vec
-    elif type == 'all' :
-        return all([x=='1' for x in vec])
-    elif type == 'absent' :
-        return not ('1' in vec)
-    else :
-        return True
-
-if __name__ == '__main__' :
-
-    opts, args = parser.parse_args(sys.argv[1:])
-
-    if len(args) != 1 :
-        parser.error('Exactly one mapped file must be provided')
-
-    map_fn = args[0]
-
-    map_reader = reader(open(map_fn),delimiter='\t')
-    headers = map_reader.next()
-    bind_headers = [i for i,x in enumerate(headers) if x.startswith('BIND:')]
-    bind_map_headers = [i for i,x in enumerate(headers) if x.startswith('BIND:') and x.endswith('MAPPED')]
-    affy_headers = [i for i,x in enumerate(headers) if x.startswith('AFFY:')]
-    affy_map_headers = [i for i,x in enumerate(headers) if x.startswith('AFFY:') and x.endswith('MAPPED')]
-
-    if len(bind_headers) == 0 and len(affy_headers) == 0 :
-        parser.error('No BIND: or AFFY: columns were found in the mapping, was *join_mapped_known_genes.py* run with the *--field-types* option?')
-
-    # figure out which columns user wants
-    header_indices = [0,1] # always output knowngene and symbol
-
-    bind_header_patts = opts.bind_cols.split(',')
-    header_indices += [i for i in bind_headers if match_headers(bind_header_patts,headers[i])]
-
-    affy_header_patts = opts.affy_cols.split(',')
-    header_indices += [i for i in affy_headers if match_headers(affy_header_patts,headers[i])]
-
-    out_f = open(opts.output,'w') if opts.output else sys.stdout
-    map_writer = writer(out_f,delimiter='\t')
-
-    map_writer.writerow([headers[i] for i in header_indices])
-    for rec in map_reader :
-        bind_vector = [rec[i] for i in bind_map_headers]
-        bind_pass = filter_vector(opts.bind_filt,bind_vector)
-
-        affy_vector = [rec[i] for i in affy_map_headers]
-        affy_pass = filter_vector(opts.affy_filt,affy_vector)
-
-        if bind_pass and affy_pass :
-            map_writer.writerow([rec[i] for i in header_indices])
-
-    if opts.output : out_f.close()
--- a/chipsequtil-master/scripts/generate_stats_doc.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,25 +0,0 @@
-#!/usr/bin/env python
-
-from matplotlib.pyplot import *
-
-from reStUtil import *
-
-if __name__ == '__main__' :
-
-    # read stats
-    # - common read sequences
-    # - overall quality scores
-
-
-    # alignment stats
-    # - # alignments
-    # - uniquely aligned
-    # - multi reads
-    # - fail filter
-    # - alignments per chromosome bar chart
-
-
-    # peak stats
-
-
-    # motif stats and plots
--- a/chipsequtil-master/scripts/gerald_stats.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,113 +0,0 @@
-#!/usr/bin/env python
-
-import sys, re, os
-from datetime import datetime
-from optparse import OptionParser
-from collections import defaultdict as dd
-#from progressbar import ProgressBar
-from csv import reader, writer
-
-from chipsequtil import get_file_parts
-from chipsequtil.util import MultiLineHelpFormatter as MF
-from reStUtil import ReStDocument, ReStSimpleTable
-
-usage = "%prog [options] <filename> [<filename>...]"
-description="""\
-Outputs various stats about the GERALD formatted file(s) input. If multiple
-files are provided statistics are aggregated according to the specified output
-format.  Output formats available via --format=X :
-
-  # *python* - print an eval()'able python dictionary w/ counts
-  # *rst* - print statistics in a reStructured text table (default)
-  # *tab* - print statistics in a tab delimited form w/ header names
-
-Except for *python* format, each input file has its own output line.  *python*
-summarizes all alignments.
-"""
-
-parser = OptionParser(usage=usage,description=description,formatter=MF())
-parser.add_option('--output',dest='output',default=None,help='write output to file [default: stdout]')
-parser.add_option('--format',dest='format',type='choice',choices=['python','rst','tab'],default='rst',help='format to print out stats [default: %default]')
-
-def log(st) :
-    print datetime.now().isoformat()+' - '+st
-
-re_digits_nondigits = re.compile(r'\d+|\D+')
-def format_with_commas(value,format='%s'):
-    parts = re_digits_nondigits.findall(format % (value,))
-    for i in xrange(len(parts)):
-        s = parts[i]
-        if s.isdigit():
-            parts[i] = _commafy(s)
-            break
-    return ''.join(parts)
-
-def _commafy(s):
-
-    r = []
-    for i, c in enumerate(reversed(s)):
-        if i and (not (i % 3)):
-            r.insert(0, ',')
-        r.insert(0, c)
-    return ''.join(r)
-
-if __name__ == '__main__' :
-
-    opts,args = parser.parse_args(sys.argv[1:])
-
-    gerald_fns = args
-
-    all_stats = dd(int)
-    stat_dicts = {}
-    stats_fields = ["sample",
-                    "total alignments",
-                    "% align unique",
-                    "# reads aligned unique",
-                    "% align repeat",
-                    "# reads align repeat",
-                    "% align none",
-                    "# reads align none"
-                   ]
-
-
-    data_rows = []
-    for gerald_fn in gerald_fns :
-        stats = stat_dicts[gerald_fn] = dd(int)
-
-        fnpath,fn,fnbase,fnext = get_file_parts(gerald_fn)
-        gerald_lines = reader(open(gerald_fn),delimiter='\t')
-        for row in gerald_lines :
-            m = re.match('^(\d+):(\d+):(\d+)$',row[10])
-            if m is not None :
-                stats['multiread'] += 1
-                all_stats['multiread'] += 1
-            else :
-                stats[row[10]] += 1
-                all_stats[row[10]] += 1
-
-        tot_reads = sum(stats.values())/1.-stats.get('QC',0)
-        unique_reads = sum([v for k,v in stats.items() if k.startswith('chr')])
-        repeat_reads = stats.get('multiread',0)
-        nomap_reads = stats.get('NM',0)
-        data_row = [fn,format_with_commas(int(tot_reads)),
-                    '%.1f'%(unique_reads/tot_reads*100),format_with_commas(unique_reads),
-                    '%.1f'%(repeat_reads/tot_reads*100),format_with_commas(repeat_reads),
-                    '%.1f'%(nomap_reads/tot_reads*100),format_with_commas(nomap_reads)]
-
-        data_rows.append(data_row)
-
-    out_f = open(opts.output,'w') if opts.output is not None else sys.stdout
-
-    if opts.format == 'python' :
-        out_f.write(dict(all_stats))
-    elif opts.format == 'rst' :
-        doc = ReStDocument(out_f)
-        table = ReStSimpleTable(header=stats_fields,data=data_rows)
-        doc.add(table)
-        doc.write()
-    elif opts.format == 'tab' :
-        out_w = writer(out_f,delimiter='\t')
-        out_w.writerow(stats_fields)
-        out_w.writerows(data_rows)
-
-    if opts.output is not None : out_f.close()
--- a/chipsequtil-master/scripts/gerald_to_bed.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,82 +0,0 @@
-#!/usr/bin/env python
-
-import os
-import re
-import sys
-
-from optparse import OptionParser
-from csv import DictReader, DictWriter
-from chipsequtil import get_file_parts, GERALDOutput
-
-usage = "%prog [options] <GERALD file> [<GERALD file>...]"
-
-description = """\
-Convert the GERALD alignment formatted files into BED format.  Input file named
-<path>/<filename>.<ext> is translated into <path>/<filename>.bed unless --output
-or --stdout is specified, in which case formatted lines are written to file or
-standard output, respectively.  If multiple input files are supplied with the
---output or --stdout option all formatted lines are concatenated together.
-Formatting only occurs for GERALD input lines that have a valid Match Position
-field (i.e. successfully aligned somewhere)."""
-
-parser = OptionParser(usage=usage, description=description)
-parser.add_option('--output',dest='output',default=None,help='write all records to file')
-parser.add_option('--stdout',dest='stdout',action='store_true',help='write out all formatted lines to stdout')
-parser.add_option('--min-fields',dest='min_fields',action='store_true',help='only format the first three fields')
-parser.add_option('--pass-only',dest='pass_only',action='store_true',help='only format lines with Y in the Pass Filtering field')
-parser.add_option('--chromo-strip',dest='chromo_strip',default='.fa',help='pattern to remove from chromo field in BED output (e.g. --chromo-strip=.fa to remve .fa from chrX.fa) [default: %default]')
-
-
-
-if __name__ == '__main__' :
-
-    opts,args = parser.parse_args(sys.argv[1:])
-
-    if len(args) == 0 :
-        parser.print_usage()
-        sys.exit(1)
-
-    gerald_fns = args
-
-    # step through the files
-    for gerald_fn in gerald_fns :
-        path,fn,fnbase,fnext = get_file_parts(gerald_fn)
-        bed_lines = []
-
-
-        # where to write output to
-        if opts.stdout :
-            f_out = sys.stdout
-        else :
-            f_out = open(os.path.join(path,fnbase+'.bed'),'w')
-
-        # process input
-        gerald_d = DictReader(open(gerald_fn),fieldnames=GERALDOutput.FIELD_NAMES,delimiter='\t')
-        for line_d in gerald_d :
-            if (opts.pass_only and line_d['filtering'] == 'Y' and line_d['match_pos'] != '') or (not opts.pass_only and line_d['match_pos'] != '') :
-
-                if opts.chromo_strip is not None :
-                    line_d['match_chromo'] = line_d['match_chromo'].replace(opts.chromo_strip,'')
-
-                outline = [line_d['match_chromo'], # chromosome
-                           line_d['match_pos'], # start
-                           str(int(line_d['match_pos'])+len(line_d['read'])), # end
-                           line_d['read'], # read
-                           '0', # score
-                           '+' if line_d['match_strand'] == 'F' else '-', # strand
-                           '-', # thickStart
-                           '-', # thickEnd
-                           '0,0,255' if line_d['match_strand'] == 'F' else '255,0,0', # itemRgb
-                          ]
-                outline = '\t'.join(outline)
-                f_out.write(outline+'\n')
-                #bed_lines.append(bed)
-
-        # this is the slow way
-        #for line in open(gerld_fn) :
-        #    grld = GERALDOutput(line)
-        #    if (opts.pass_only and grld.filtering == 'Y' and grld.match_pos != '') or (not opts.pass_only and grld.match_pos != '') :
-        #        bed = gerald_to_bed(grld,opts.min_fields)
-        #        f_out.write(bed.output_format())
-        #        #bed_lines.append(bed)
-
--- a/chipsequtil-master/scripts/integrate_macs_ucsc.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,112 +0,0 @@
-#!/usr/bin/env python
-
-import os
-import sys
-from optparse import OptionParser
-from pypeline import Pypeline, ProcessPypeStep as PPS, PythonPypeStep as PyPS
-
-from chipsequtil import get_org_settings
-
-usage = "%prog <org> <stage dir> <stage url> <MACS wiggle directory>"
-description = """Process a MACS wiggle directory when macs is invoked
-with --wig option, convert all gzipped chromosome wiggle files to
-bigWig format, copy to web staging directory <stage dir>, and create
-track lines for adding to UCSC genome browser.  Requires a <org> argument
-that has a path using *org_settings.py <org> ucsc_chrom_sizes* that
-points to a sizes file as created by UCSC's *fetchChromSizes <org>*
-tool."""
-
-parser = OptionParser(usage=usage,description=description)
-parser.add_option('--auto',dest='auto',action='store_true',help='run all steps non-interactively (for batch mode, e.g.)')
-
-if __name__ == '__main__' :
-
-    opts, args = parser.parse_args(sys.argv[1:])
-
-    if len(args) != 4 :
-        parser.error('Exactly four non-option arguments required')
-
-    organism, stage_dir, stage_url, macs_dir = args
-
-    pipeline = Pypeline('UCSC Integration',log='ucsc_integ.log')
-
-    steps = []
-
-    org_settings = get_org_settings(organism)
-
-    macs_path, macs_wiggle_path = os.path.dirname(macs_dir), os.path.basename(macs_dir)
-    macs_name = macs_wiggle_path.replace('_MACS_wiggle','')
-    wiggle_dir = macs_name+'_MACS_wiggle'
-    bigwig_fn = macs_name+'_%s_all_chr.bw'
-    d = {'wiggle_dir':macs_name+'_MACS_wiggle',
-         'chrom_sizes':org_settings['ucsc_chrom_sizes'],
-         'treat_bigwig_fn':macs_name+'_treat_all_chr.bw',
-         'control_bigwig_fn':macs_name+'_control_all_chr.bw',
-         'stage_dir':stage_dir,
-         'stage_url':stage_url,
-         'pwd':os.getcwd(),
-        }
-
-    # create bigWig files
-    zcat_treat_call = "zcat %(wiggle_dir)s/treat/*.gz | " + \
-                       "grep -v '^track' | " + \
-                       "sed 's/\.fa//g' | " + \
-                       "wigToBigWig -clip stdin %(chrom_sizes)s " + \
-                       "%(wiggle_dir)s/treat/%(treat_bigwig_fn)s"
-    zcat_control_call = "zcat %(wiggle_dir)s/control/*.gz | " + \
-                        "grep -v '^track' | " + \
-                        "sed 's/\.fa//g' | " + \
-                         "wigToBigWig -clip stdin %(chrom_sizes)s " + \
-                         "%(wiggle_dir)s/control/%(control_bigwig_fn)s"
-    steps.append(PPS('Convert wig to bigWig',[zcat_treat_call%d,zcat_control_call%d]))
-
-    # create the staging directory
-    mk_stage_dir_call = "mkdir -p %(stage_dir)s/%(wiggle_dir)s"%d
-    steps.append(PPS('Create staging directory',[mk_stage_dir_call]))
-
-    # stage bigWig files to staging directory (create links)
-    stage_treat_call = "ln -fs %(pwd)s/%(wiggle_dir)s/treat/%(treat_bigwig_fn)s " + \
-                       "%(stage_dir)s/%(wiggle_dir)s/%(treat_bigwig_fn)s"
-    stage_control_call = "ln -fs %(pwd)s/%(wiggle_dir)s/control/%(control_bigwig_fn)s " + \
-                       "%(stage_dir)s/%(wiggle_dir)s/%(control_bigwig_fn)s"
-    steps.append(PPS('Stage bigWig files',[stage_treat_call%d,stage_control_call%d]))
-
-    # generate track lines for treatment and control
-    treat_track_d = ['track',
-               'type=bigWig',
-               'name="Treatment"',
-               'description="%s Treatment"'%macs_name,
-               'bigDataUrl=%(stage_url)s/%(wiggle_dir)s/%(treat_bigwig_fn)s'%d]
-    treat_track = ' '.join(treat_track_d)
-
-    control_track_d = ['track',
-               'type=bigWig',
-               'name="Control"',
-               'description="%s Control"'%macs_name,
-               'bigDataUrl=%(stage_url)s/%(wiggle_dir)s/%(control_bigwig_fn)s'%d]
-    control_track = ' '.join(control_track_d)
-    track_str = '\n'.join([treat_track,
-                          control_track])
-
-    track_fn = wiggle_dir+'_tracks.txt'
-    def track_call(track_fn, track_str) :
-        f = open(track_fn,'w')
-        f.write(track_str+'\n')
-        f.close()
-    steps.append(PyPS('Generate track lines file',track_call,
-                      callable_args=(track_fn,track_str))
-                )
-
-    #calls = [zcat_treat_call,
-    #         zcat_control_call,
-    #         mk_stage_dir_call,
-    #         stage_treat_call,
-    #         stage_control_call,
-    #         track_call
-    #         ]
-
-    #print calls
-    #steps.append(PPS('Stage Wiggle',calls))
-
-    pipeline.add_steps(steps)
-    pipeline.run(interactive=not opts.auto)
--- a/chipsequtil-master/scripts/join_mapped_known_genes.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,154 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-import warnings
-
-from csv import reader, writer
-from collections import defaultdict as dd
-from optparse import OptionParser
-
-usage = '%prog -b <mapped DNA binding file>|-a <mapped microarray file> [-b <mapped DNA binding file> ...] [-a <mapped microarray file> ...]'
-description = """Join all files on the first column, concatenating records with \
-matching entries onto one line per entry.  Understands DNA binding data as mapped \
-with *map_peaks_to_known_genes.py* utility microarray data as mapped by \
-*probeset_to_known_genes.py* utility, passed to program using *-b* and *-a* options \
-respectively.  If a file contains more than one mapping to a gene additional columns \
-are added. At least one file of either type is required.  Field names are written as \
-<filename>.<original field name>.<map number>
-"""
-epilog="Note: microarray files should have been created by bioconductor, and all files should have a row of fieldnames as the first line"
-parser = OptionParser(usage=usage,description=description,epilog=epilog)
-parser.add_option('-a','--affy-file',dest='affy_file',action='append',default=[],help='add a mapped microarray file')
-parser.add_option('-b','--bind-file',dest='bind_file',action='append',default=[],help='add a mapped DNA binding file (e.g. MACS, BED)')
-#parser.add_option('-b','--bed-file',dest='bed_file',action='append',default=[],help='add a mapped BED formatted peaks file')
-parser.add_option('-m','--macs-file',dest='macs_file',action='append',default=[],help='DEPRECATED: use -b instead, add a mapped default MACS formatted peaks (*.xls) file')
-parser.add_option('--output',dest='output',default=None,help='file to output joined records to [default: stdout]')
-#parser.add_option('--intersect',dest='intersect',action='store_true',help='only output records common to all file passed in')
-parser.add_option('--first-only',dest='first_only',action='store_true',help='only output the first mapping to a gene from each file')
-parser.add_option('--binary',dest='binary',action='store_true',help='output only one column per file with a 0 or 1 to indicate whether a mapping exists in that file')
-parser.add_option('--binary-plus',dest='binary_plus',action='store_true',help='output one column per file with a 0 or 1 to indicate whether a mapping exists in that file in addition to all other columns')
-parser.add_option('--field-types',dest='field_types',action='store_true',help='prepend BIND or AFFY to the beginning of all appropriate columns')
-#parser.add_option('--symbols',dest='symbols',action='store_true',help='mapped files contain symbols in second column (per map_peaks_to_known_genes.py|probeset_to_known_gene.py --symbol-xref option)')
-
-if __name__ == '__main__' :
-
-    opts,args = parser.parse_args(sys.argv[1:])
-
-    if len(args) > 0 :
-        parser.error('There were non-option command line arguments passed, all files should have a preceeding option indicating filetype')
-
-    if len(opts.macs_file) != 0 :
-        warnings.warn('The -m option is deprecated, please replace these flags with -b instead.  Adding MACS filenames to binding filename list.',DeprecationWarning)
-        opts.bind_file.extend(opts.macs_file)
-
-    if len(opts.bind_file) == 0 and len(opts.affy_file) == 0 :
-        parser.error('No files were passed in, aborting')
-
-    # union of all genes
-    all_genes = set()
-
-    # TODO - fix intersect w/ binary
-    opts.intersect = False
-
-    # TODO - actually make this an option, or the default
-    opts.symbols = True
-    if opts.symbols :
-        symbol_map = {}
-
-    # read all the files in
-    def get_file_dict(fns,header_prefix='') :
-        file_map = dd(lambda: dd(list))
-        out_fieldnames = []
-        blank_entry = []
-        for fn in fns :
-            max_maps = 0
-            f = reader(open(fn),delimiter='\t')
-            #f = open(fn)
-            fieldnames = f.next()
-            fieldnames = fieldnames[2:] # we don't want existing knownGeneID or geneSymbol
-            # read in the data, create a dictionary
-            for l in f :
-                if opts.symbols :
-                    gene, symbol, data = l[0],l[1],l[2:]
-                    symbol_map[gene] = symbol
-                else :
-                    gene, data = l.split('\t',1)
-                file_map[fn][gene].append(data)
-                max_maps = max(max_maps,len(file_map[fn][gene]))
-                all_genes.add(gene)
-
-            # if we're adding a binary column, do it
-            if opts.binary_plus :
-                out_fieldnames.append(header_prefix+fn+'.MAPPED')
-
-            # construct the fieldnames for this file
-            for i in range(max_maps) :
-                out_fieldnames.extend(['%s%s.%d.%s'%(header_prefix,fn,i,h) for h in fieldnames])
-
-            # pad out data entries w/ fewer than max_maps
-            for gene,data in file_map[fn].items() :
-                while len(data) < max_maps :
-                    data.append(['']*len(fieldnames))
-            file_map[fn]['blank'] = [['']*len(fieldnames) for _ in range(max_maps)]
-        return file_map,out_fieldnames
-
-    #macs_file_map, macs_fieldnames = get_file_dict(opts.macs_file)
-    #bed_file_map, bed_fieldnames = get_file_dict(opts.bed_file)
-    bind_prefix = 'BIND:' if opts.field_types else ''
-    affy_prefix = 'AFFY:' if opts.field_types else ''
-    bind_file_map, bind_fieldnames = get_file_dict(opts.bind_file,bind_prefix)
-    affy_file_map, affy_fieldnames = get_file_dict(opts.affy_file,affy_prefix)
-
-    # prepare output objects
-    out_f = open(opts.output,'w') if opts.output else sys.stdout
-    map_fieldnames = ['knownGeneID']
-    if opts.symbols :
-        map_fieldnames.append('geneSymbol')
-    #all_fieldnames = map_fieldnames+macs_fieldnames+bed_fieldnames+affy_fieldnames
-    all_fieldnames = map_fieldnames+bind_fieldnames+affy_fieldnames
-    if opts.binary :
-        #all_fieldnames = map_fieldnames+opts.macs_file+opts.bed_file+opts.affy_file
-        all_fieldnames = [x+'.MAPPED' for x in map_fieldnames+opts.bind_file+opts.affy_file]
-    join_writer = writer(out_f,delimiter='\t')
-    join_writer.writerow(all_fieldnames)
-
-    # go through all the genes and print out lines
-    for gene in all_genes :
-        gene_line = [gene]
-        if opts.symbols :
-            gene_line.append(symbol_map[gene])
-        #for filetype_data,fns in zip([macs_file_map,bed_file_map,affy_file_map],[opts.macs_file,opts.bed_file,opts.affy_file]) :
-        for filetype_data,fns in zip([bind_file_map,affy_file_map],[opts.bind_file,opts.affy_file]) :
-            for fn,recs in [(fn,filetype_data[fn]) for fn in fns] :
-            #for fn,recs in d.items() :
-                if recs.has_key(gene) :
-                    # only output the first entry
-                    if opts.first_only :
-                        gene_line.extend(recs[gene][0])
-                    # only output a 1 or a zero
-                    elif opts.binary :
-                        gene_line.extend('1')
-                    # else output normally
-                    else :
-                        # add binary column in addition to other output
-                        if opts.binary_plus :
-                            gene_line.extend('1')
-                        for rec in recs[gene] :
-                            gene_line.extend(rec)
-                else :
-                    # if intersecting, ignore this gene
-                    if opts.intersect :
-                        continue
-                    elif opts.binary :
-                        gene_line.extend('0')
-                    else :
-                        # add binary column in addition to other output
-                        if opts.binary_plus :
-                            gene_line.extend('0')
-                        for blank in filetype_data[fn]['blank'] :
-                            #print len(blank)
-                            gene_line.extend(blank)
-                #print fn, gene_line[2], len(gene_line), gene_line
-        join_writer.writerow(gene_line)
-
-    if opts.output : out_f.close()
--- a/chipsequtil-master/scripts/kg_to_gff.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,78 +0,0 @@
-#!/usr/bin/env python
-
-import os
-import sys
-from csv import DictReader, DictWriter, QUOTE_NONE
-from optparse import OptionParser
-
-from chipsequtil import KnownGeneFile, get_file_parts
-
-#args = ['/nfs/genomes/mouse_gp_jul_07/anno/knownGene-2010-07-08.txt','/nfs/genomes/mouse_gp_jul_07/anno/kgXref-2010-07-08.txt']
-args = ['/nfs/genomes/mouse_gp_jul_07/anno/knownGene-2010-08-03.gtf','/nfs/genomes/mouse_gp_jul_07/anno/kgXref-2010-07-08.txt']
-usage = '%prog <knownGene annotation>'
-description = 'convert a UCSC knownGene annotation to GFF'
-parser = OptionParser(usage=usage,description=description)
-
-
-if __name__ == '__main__' :
-
-    opts, args = parser.parse_args(args)
-
-    kg_path,kg_fn,kg_base,kg_ext = get_file_parts(args[0])
-    #kg_f = KnownGeneFile(args[0])
-
-    # xref for finding gene symbols
-    kgXref_fn = args[1]
-    kgXref_fieldnames = ['kgID','mRNA','spID','spDisplayID','geneSymbol','refseq','proAcc','description']
-    xref_map = dict([(x['kgID'],x) for x in DictReader(open(kgXref_fn),delimiter='\t',fieldnames=kgXref_fieldnames)])
-
-    gff_headers = ['seqname','source','feature','start','end','score','strand','frame','attributes']
-    gff_reader = DictReader(open(args[0]),delimiter='\t',fieldnames=gff_headers)
-    gff_writer = DictWriter(sys.stdout,delimiter='\t',fieldnames=gff_headers,quotechar='',quoting=QUOTE_NONE,lineterminator='\n')
-    #gff_writer.writerow(dict([(x,x) for x in gff_headers]))
-
-    for i,rec in enumerate(gff_reader) :
-        #d = {}
-        #d['seqname'] = rec['chrom']
-        #d['source'] = 'UCSC_knownGene'
-        #d['feature'] = 'gene'
-        #d['start'] = rec['txStart']
-        #d['end'] = rec['txEnd']
-        #d['score'] = '.'
-        #d['strand'] = rec['strand']
-        #d['frame'] = '.'
-        #gene_name = rec['name']
-
-        gff_attrs_lst = [x.strip() for x in rec['attributes'].split(';')][:-1]
-        gff_attrs = {}
-        for attr in gff_attrs_lst :
-            k,v = attr.split(' ',1)
-            gff_attrs[k] = eval(v)
-
-        kg_name = gff_attrs['gene_id']
-
-        # try to find a gene symbol
-        gene_id = xref_map[kg_name].get('geneSymbol',None)
-        #gene_id = kg_name
-        #if gene_id is None :
-        #    gene_id = xref_map[kg_name].get('mRNA',None)
-        #if gene_id is None :
-        #    gene_id = xref_map[kg_name].get('refseq',None)
-        if gene_id is None : # I give up
-            gene_id = kg_name
-
-        gff_attrs_lst += ['gene_name "%s"'%gene_id]
-        rec['attributes'] = '; '.join(gff_attrs_lst)
-        gff_writer.writerow(rec)
-
-        # now write the exons
-        #d['feature'] = 'exon'
-        #for j,(st,en) in enumerate(zip(rec['exonStarts'],rec['exonEnds'])) :
-        #    d['start'] = st
-        #    d['end'] = en
-        #    d['attributes'] = '; '.join(['gene_id "%s"'%gene_id,'transcript_id "%s"'%rec['name'],'exon_number "%d"'%(j+1),'ID "%s.exon_%d"'%(rec['name'],j),'PARENT "%s"'%rec['name']])
-        #    gff_writer.writerow(d)
-
-
-    # version with knownGene in gene_name
-    # version with symbol in gene_name
--- a/chipsequtil-master/scripts/map_intervals.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,126 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-from collections import defaultdict
-from csv import reader
-from optparse import OptionParser
-
-from bx.intervals.intersection import IntervalTree, Interval
-
-usage = '%prog [options] <from> <to>'
-description = """Find records in <to> interval file that map to records in
-<from> interval file.  Files should be tab delimited and are expected to have
-a chromosome column, a start column, and an end column.  The indices of these
-columns can be specified on the command line but by default are the first
-three columns, respectively.  Prints out to stdout by default one new line
-separated row per row in <from> with a line from <to> where there is a mapping.
-If no mapping is found (e.g. when specifying a maximum margin to search within)
-the word None is printed.  By default only prints nearest record, with ties
-settled by smallest line number in <to>."""
-parser = OptionParser(usage=usage,description=description)
-parser.add_option('-w','--window',dest='window',type="float",nargs=2,
-                  default=(1e9,1e9),
-                  help="window as <int upstream> <int downstream> to search for intervals [default: %default]")
-parser.add_option('-f','--from',dest='from_ind',type="int",nargs=3,
-                  default=(0,1,2),
-                  help="coordinates of chromosome, start, stop in <from> file")
-parser.add_option('-i','--skip-from-header',dest='skip_fh',action='store_true',
-                  help="<from> has a header that should be skipped")
-parser.add_option('-t','--to',dest='to_ind',type="int",nargs=3,
-                  default=(0,1,2),
-                  help="coordinates of chromosome, start, stop in <to> file")
-parser.add_option('-j','--skip-to-header',dest='skip_th',action='store_true',
-                  help="<to> has a header that should be skipped")
-
-if __name__ == '__main__' :
-
-    opts, args = parser.parse_args(sys.argv[1:])
-
-    if len(args) != 2 :
-        parser.error('Exactly 2 non-option arguments are required')
-
-    from_fn, to_fn = args
-
-    chr_trees = defaultdict(IntervalTree)
-    chr_sizes = defaultdict(lambda : dict(minstart=sys.maxint,maxend=0))
-
-    if any([x > 1e9 for x in opts.window]) :
-        parser.error('Window maximum is +/- 1e9')
-
-    to_reader = reader(open(to_fn),delimiter='\t')
-    if opts.skip_th :
-        to_header = to_reader.next()
-
-    to_chr, to_st, to_en = opts.to_ind
-    for r in to_reader :
-        i = Interval(int(r[to_st]),
-                     int(r[to_en]),
-                     value=r,
-                     chrom=r[to_chr]
-                     )
-        chr_trees[r[to_chr]].insert_interval(i)
-        chr_sizes[r[to_chr]]['minstart'] = min(int(r[to_st]),chr_sizes[r[to_chr]]['minstart'])
-        chr_sizes[r[to_chr]]['maxend'] = max(int(r[to_st]),chr_sizes[r[to_chr]]['maxend'])
-
-    # window default is 1e9 because no chromosome is more than
-    # ten billion base pairs, right?!
-    def find_nearest(t,s,e,window=(1e9,1e9)) :
-
-        # look for record within intervals
-        inside = t.find(s,e)
-
-        if len(inside) >= 1 : # pick the first one, list returned is sorted
-            return inside[0]
-
-        i = Interval(s,e)
-        before = t.upstream_of_interval(i,max_dist=window[0])
-        after = t.downstream_of_interval(i,max_dist=window[1])
-
-        before = before[0] if len(before) != 0 else None
-        after = after[0] if len(after) != 0 else None
-
-        if before and after :
-            b_dist = min(abs(before.end-s),abs(e-before.start))
-            a_dist = min(abs(after.end-s),abs(e-after.start))
-            nearest = before if b_dist < a_dist else after
-        elif before :
-            nearest = before
-        elif after :
-            nearest = after
-        else :
-            nearest = None
-        return nearest
-
-    # now go through the from file
-    from_reader = reader(open(from_fn),delimiter='\t')
-    if opts.skip_fh : from_reader.next()
-
-    from_chr, from_st, from_en = opts.from_ind
-    if opts.skip_th :
-        print '\t'.join(to_header)
-    for r in from_reader :
-        t = find_nearest(chr_trees[r[from_chr]],int(r[from_st]),int(r[from_en]),
-                         window=opts.window)
-        if t :
-            print '\t'.join(t.value)
-        else :
-            print t
-    """
-    # tests
-    print 'interval is before any other interval in tree'
-    t = find_nearest(chr_trees['chr2'],10388500,10388510)
-    print '\tCorrect answer: %s, Returned answer: %s'%('mmu-mir-466f-1',t.value),t
-    print 'interval is after any other interval in tree'
-    t = find_nearest(chr_trees['chr1'],200000000,200000010)
-    print '\tCorrect answer: %s, Returned answer: %s'%('mmu-mir-29c',t.value),t
-    print 'interval is between intervals'
-    t = find_nearest(chr_trees['chr3'],89773941,89774021)
-    print '\tCorrect answer: %s, Returned answer: %s'%('mmu-mir-190b',t.value),t
-    print 'interval is inside another interval'
-    t = find_nearest(chr_trees['chr3'],89873999,89874001)
-    print '\tCorrect answer: %s, Returned answer: %s'%('mmu-mir-190b',t.value), t
-    print 'interval is too far from anything to return anything'
-    t = find_nearest(chr_trees['chr3'],89773941,89774021,window=10)
-    print '\tCorrect answer: None, Returned answer: %s'%t
-    """
--- a/chipsequtil-master/scripts/map_peaks_to_genes.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,202 +0,0 @@
-#!/usr/bin/env python
-
-import sys, os
-from optparse import OptionParser
-from collections import defaultdict as dd
-from chipsequtil import MACSOutput, BEDOutput, RefGeneOutput, parse_number
-from csv import DictReader, DictWriter
-
-usage = '%prog [options] <refGene file> <peaks file>'
-description = """
-Map the peaks in <peaks file> to genes in <refGene file>.  <refGene file> is
-format is as specified in http://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/refGene.sql.
-<peaks file> format is as produced by MACS."""
-epilog = ''
-parser = OptionParser(usage=usage,description=description,epilog=epilog)
-parser.add_option('--upstream-window',dest='upst_win',type='int',default=5500,help='window width in base pairs to consider promoter region [default: %default]')
-parser.add_option('--downstream-window',dest='dnst_win',type='int',default=2500,help='window width in base pairs to consider downstream region [default: %default]')
-parser.add_option('--map-output',dest='peak_output',default=sys.stdout,help='filename to output mapped peaks in BED format to [default: stdout]')
-parser.add_option('--stats-output',dest='stats_output',default=sys.stderr,help='filename to output summary stats in conversion [default: stderr]')
-parser.add_option('--peaks-format',dest='peaks_fmt',default='MACS',type='choice',choices=['MACS','BED'],help='format of peaks input file [default: %default]')
-
-# TODO - options
-#parser.add_option('--use-cds',dest='use_cds',action='store_true',help='use cdsStart and cdsEnd fields instead of txStart and txEnd to do mapping')
-#parser.add_option('--capture-intergenic'...)
-#parser.add_option('--map-format',dest='peak_format',type='choice',choices=['default','BED'],help='format of peak output [default: %default]')
-#parser.add_option('--stats-format',dest='stats_format',type='choice',choices=['human','python'],help='format of summary stats output [default: %default]')
-
-def parse_gene_ref(ref_gene) :
-    #FIXME - maybe, if galaxy doesn't work out, figure out how to deal with multiple RefGene mapping formats?
-    fieldnames = ['geneName','name','chrom','strand','txStart','txEnd','cdsStart','cdsEnd','exonCount','exonStarts','exonEnds']
-    reader = DictReader(ref_gene,fieldnames=fieldnames,delimiter='\t')
-    gene_ref = dd(list)
-    for ref_dict in reader :
-        for k,v in ref_dict.items() :
-            # coerce numbers where possible
-            ref_dict[k] = parse_number(v)
-
-        # turn 'x,x,x,...' into a list
-        ref_dict['exonStarts'] = [parse_number(x) for x in ref_dict['exonStarts'].split(',')]
-        if ref_dict['exonStarts'][-1] == '' : ref_dict['exonStarts'].remove('')
-        ref_dict['exonEnds'] = [parse_number(x) for x in ref_dict['exonEnds'].split(',')]
-        if ref_dict['exonEnds'][-1] == '' : ref_dict['exonEnds'].remove('')
-
-        gene_ref[ref_dict['chrom']].append(ref_dict)
-
-    return gene_ref
-
-
-if __name__ == '__main__' :
-
-    opts, args = parser.parse_args(sys.argv[1:])
-
-    if len(args) < 2 :
-        parser.error('Must provide two filename arguments')
-
-    gene_ref = parse_gene_ref(open(args[0]))
-    if opts.peaks_fmt == 'MACS' :
-        fieldnames = MACSOutput.FIELD_NAMES
-        chr_field, start_field, end_field = 'chr', 'start', 'end'
-    elif opts.peaks_fmt == 'BED' :
-        fieldnames = BEDOutput.FIELD_NAMES
-        chr_field, start_field, end_field = 'chrom', 'chromStart', 'chromEnd'
-    else :
-        fieldnames = []
-
-    peaks_reader = DictReader(open(args[1]),fieldnames=fieldnames,delimiter='\t')
-
-    # default output format:
-    # <chromo> <peak loc> <accession #> <gene symbol> <strand> <map type> <map subtype> <score> <dist from feature>
-    # score = (peak-TSS)/(TSE-TSS) - peak distance from TSS as fraction of length of gene
-    output_fields = ['chromo',
-                     'peak loc',
-                     'accession #',
-                     'gene symbol',
-                     'strand',
-                     'map type',
-                     'map subtype',
-                     'score',
-                     'dist from feature',
-    ]
-    if opts.peak_output != sys.stdout :
-        opts.peak_output = open(opts.peak_output,'w')
-    peaks_writer = DictWriter(opts.peak_output,output_fields,delimiter='\t',lineterminator='\n')
-    unique_genes = set()
-    map_stats = dd(int)
-    for peak in peaks_reader :
-
-        # if this is a comment or header line get skip it
-        if peak[fieldnames[0]].startswith('#') or \
-           peak[fieldnames[0]] == fieldnames[0] or \
-           peak[fieldnames[0]].startswith('track') : continue
-
-        # coerce values to numeric if possible
-        for k,v in peak.items() : peak[k] = parse_number(v)
-
-        # peak assumed to be in the middle of the reported peak range
-        peak_loc = (peak[start_field]+peak[end_field])/2
-
-        chrom_genes = gene_ref[peak[chr_field]]
-
-        if len(chrom_genes) == 0 :
-            sys.stderr.write('WARNING: peak chromosome %s not found in gene reference, skipping: %s\n'%(peak[chr_field],peak))
-            continue
-
-        mapped = False
-
-        # walk through the genes for this chromosome
-        for gene in chrom_genes :
-
-            # reusable dictionary for output
-            out_d = {}.fromkeys(output_fields,0)
-            out_d['map type'] = ''
-            out_d['chromo'] = peak[chr_field]
-            out_d['peak loc'] = peak_loc
-
-            # determine intervals for promoter, gene, and downstream
-            if gene['strand'] == '+' :
-                promoter_coords = max(gene['txStart']-1-opts.upst_win,0), gene['txStart']-1
-                gene_coords = gene['txStart'], gene['txEnd']
-                downstream_coords = gene['txEnd']+1, gene['txEnd']+1+opts.dnst_win
-            else :
-                promoter_coords = gene['txEnd']+1, gene['txEnd']+1+opts.upst_win # +1 because we're using 1 based indexing
-                gene_coords = gene['txStart'], gene['txEnd']
-                downstream_coords = gene['txStart']-1-opts.dnst_win, gene['txStart']-1 # -1 because we're using 1 based indexing
-
-            # check for promoter
-            if peak_loc >= promoter_coords[0] and peak_loc <= promoter_coords[1] :
-                out_d['map type'] = 'promoter'
-                out_d['dist from feature'] = peak_loc - promoter_coords[1] if gene['strand'] == '+' else promoter_coords[0] - peak_loc
-
-            # check for gene
-            elif peak_loc >= gene_coords[0] and peak_loc <= gene_coords[1] :
-                # check for intron/exon
-                exon_coords = zip(gene['exonStarts'],gene['exonEnds'])
-                in_exon = False
-                for st,en in exon_coords :
-                    if peak_loc >= st and peak_loc <= en :
-                        in_exon = True
-                        break
-                out_d['map type'] = 'gene'
-                out_d['map subtype'] = 'exon' if in_exon else 'intron'
-
-                # score = (peak-TSS)/(TSE-TSS) - peak distance from TSS as fraction of length of gene
-                gene_len = float(gene_coords[1]-gene_coords[0])
-                out_d['score'] = (peak_loc-gene_coords[0])/gene_len if gene['strand'] == '+' else (gene_coords[1]-peak_loc)/gene_len
-
-                # distance calculated from start of gene
-                out_d['dist from feature'] = peak_loc - promoter_coords[1] if gene['strand'] == '+' else promoter_coords[0] - peak_loc
-
-                map_stats[out_d['map subtype']] += 1
-
-            # check for downstream
-            elif peak_loc >= downstream_coords[0] and peak_loc <= downstream_coords[1] :
-                out_d['map type'] = 'after'
-                out_d['dist from feature'] = peak_loc - downstream_coords[0] if gene['strand'] == '+' else downstream_coords[1] - peak_loc
-
-            # does not map to this gene
-            else :
-                pass
-
-            # map type is not blank if we mapped to something
-            if out_d['map type'] != '' :
-
-                out_d['accession #'] = gene['name']
-                out_d['gene symbol'] = gene['geneName']
-                out_d['strand'] = gene['strand']
-
-                map_stats[out_d['map type']] += 1
-                peaks_writer.writerow(out_d)
-
-                unique_genes.add(gene['name'])
-                mapped = True
-
-                """
-                print 'Peak:',peak
-                print 'Gene:',gene
-                print 'Peak loc:',peak_loc
-                print promoter_coords
-                print gene_coords
-                print downstream_coords
-                raw_input('Wait for it...')
-                """
-
-                # reset map_type
-                out_d['map type'] = ''
-
-        if not mapped :
-            #out_d['map type'] = 'intergenic'
-            #peaks_writer.writerow(out_d)
-            map_stats['intergenic'] += 1
-
-    if opts.peak_output != sys.stdout :
-        opts.peak_output.close()
-
-    if opts.stats_output != sys.stderr :
-        opts.stats_output = open(opts.stats_output,'w')
-
-    for k,v in map_stats.items() :
-        opts.stats_output.write('%s: %s\n'%(k,v))
-
-    if opts.stats_output != sys.stderr :
-        opts.stats_output.close()
--- a/chipsequtil-master/scripts/map_peaks_to_known_genes.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,233 +0,0 @@
-#!/usr/bin/env python
-
-import sys, os
-from optparse import OptionParser
-from collections import defaultdict as dd
-from csv import DictReader, DictWriter
-
-from chipsequtil import MACSFile, BEDFile, KnownGeneFile, parse_number
-from chipsequtil.util import MultiLineHelpFormatter
-
-usage = '%prog [options] <knownGene file> <knownGene xRef file> <peaks file>'
-description = """
-Map the peaks in <peaks file> to genes in <knownGene file>.  <knownGene file> is\
-format is as specified in http://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/knownGene.sql.\
-<peaks file> format is as produced by MACS.  If *auto* is chosen (default) file extension \
-is examined for *.xls* for default MACS format or *.bed* for BED format.  If the --detail\
-option is provided, the following extra fields are appended to each row:
-
-peak loc, dist from feature, score, map type, map subtype
-"""
-epilog = ''
-parser = OptionParser(usage=usage,description=description,epilog=epilog,formatter=MultiLineHelpFormatter())
-parser.add_option('--upstream-window',dest='upst_win',type='int',default=5500,help='window width in base pairs to consider promoter region [default: %default]')
-parser.add_option('--downstream-window',dest='dnst_win',type='int',default=2500,help='window width in base pairs to consider downstream region [default: %default]')
-parser.add_option('--tss',dest='tss',action='store_true',help='calculate downstream window from transcription start site instead of transcription end site')
-parser.add_option('--map-output',dest='peak_output',default=None,help='filename to output mapped peaks to [default: stdout]')
-parser.add_option('--stats-output',dest='stats_output',default=sys.stderr,help='filename to output summary stats in conversion [default: stderr]')
-parser.add_option('--peaks-format',dest='peaks_fmt',default='auto',type='choice',choices=['auto','MACS','BED'],help='format of peaks input file [default: %default]')
-parser.add_option('--detail',dest='detail',action='store_true',help='add extra fields to output, see description')
-parser.add_option('--intergenic',dest='intergenic',action='store_true',help='write intergenic peaks to the gene file as well with None as gene ID')
-#parser.add_option('--symbol-xref',dest='symbol_xref',default=None,help='use the kgXref table file supplied to find a gene symbol, output as second column')
-
-# TODO - options
-#parser.add_option('--use-cds',dest='use_cds',action='store_true',help='use cdsStart and cdsEnd fields instead of txStart and txEnd to do mapping')
-#parser.add_option('--capture-intergenic'...)
-#parser.add_option('--map-format',dest='peak_format',type='choice',choices=['default','BED'],help='format of peak output [default: %default]')
-#parser.add_option('--stats-format',dest='stats_format',type='choice',choices=['human','python'],help='format of summary stats output [default: %default]')
-
-def parse_gene_ref(ref_gene) :
-    reader = KnownGeneFile(ref_gene)
-    gene_ref = dd(list)
-    for ref_dict in reader :
-        gene_ref[ref_dict['chrom']].append(ref_dict)
-
-    return gene_ref
-
-def parse_gene_ref_line(l) :
-    l = map(parse_number, l) # coerce to numbers where possible
-    l[9] = map(parse_number, l[9].split(',')) # turn 'x,x,x,...' into list
-    l[10] = map(parse_number, l[10].split(','))
-    return l
-
-if __name__ == '__main__' :
-
-    opts, args = parser.parse_args(sys.argv[1:])
-
-    if len(args) < 3 :
-        parser.error('Must provide three filename arguments')
-
-    gene_ref = parse_gene_ref(args[0])
-    xref_fn = args[1]
-    peaks_fn = args[2]
-    if opts.peaks_fmt == 'auto' :
-        path,ext = os.path.splitext(peaks_fn)
-        if ext.lower() == '.xls' :
-            opts.peaks_fmt = 'MACS'
-        elif ext.lower() == '.bed' :
-            opts.peaks_fmt = 'BED'
-        else :
-            parser.error('Could not guess peaks file format by extension (%s), aborting'%ext)
-
-    if opts.peaks_fmt == 'MACS' :
-        peaks_reader_cls = MACSFile
-        chr_field, start_field, end_field = 'chr', 'start', 'end'
-    elif opts.peaks_fmt == 'BED' :
-        peaks_reader_cls = BEDFile
-        chr_field, start_field, end_field = 'chrom', 'chromStart', 'chromEnd'
-    else :
-        # should never happen
-        fieldnames = []
-
-    #peaks_reader = DictReader(open(args[1]),fieldnames=fieldnames,delimiter='\t')
-    peaks_reader = peaks_reader_cls(peaks_fn)
-
-    # default output format:
-    if opts.peak_output :
-        peak_output = open(opts.peak_output,'w')
-    else :
-        peak_output = sys.stdout
-
-    fieldnames = peaks_reader.FIELD_NAMES
-    if opts.detail :
-        fieldnames += ["peak loc","dist from feature","score","map type","map subtype"]
-    output_fields = ['knownGeneID']+fieldnames
-
-    # see if the user wants gene symbols too
-    # TODO - actually make this an option, or make it required
-    opts.symbol_xref = xref_fn
-    if opts.symbol_xref :
-        kgXref_fieldnames = ['kgID','mRNA','spID','spDisplayID','geneSymbol','refseq','protAcc','description']
-        symbol_xref_reader = DictReader(open(opts.symbol_xref),fieldnames=kgXref_fieldnames,delimiter='\t')
-        symbol_xref_map = {}
-        for rec in symbol_xref_reader :
-            symbol_xref_map[rec['kgID']] = rec
-        output_fields = ['knownGeneID','geneSymbol']+fieldnames
-
-    peaks_writer = DictWriter(peak_output,output_fields,delimiter='\t',extrasaction='ignore',lineterminator='\n')
-    peaks_writer.writerow(dict([(k,k) for k in output_fields]))
-    unique_genes = set()
-    map_stats = dd(int)
-    for peak in peaks_reader :
-
-        # if this is a comment or header line get skip it
-        if peak[fieldnames[0]].startswith('#') or \
-           peak[fieldnames[0]] == fieldnames[0] or \
-           peak[fieldnames[0]].startswith('track') : continue
-
-        # coerce values to numeric if possible
-        for k,v in peak.items() : peak[k] = parse_number(v)
-
-        # MACS output gives us summit
-        if opts.peaks_fmt == 'MACS' :
-            peak_loc = peak[start_field]+peak['summit']
-        else : # peak assumed to be in the middle of the reported peak range
-            peak_loc = (peak[start_field]+peak[end_field])/2
-
-        chrom_genes = gene_ref[peak[chr_field]]
-
-        if len(chrom_genes) == 0 :
-            sys.stderr.write('WARNING: peak chromosome %s not found in gene reference, skipping: %s\n'%(peak[chr_field],peak))
-            continue
-
-        mapped = False
-
-        # walk through the genes for this chromosome
-        for gene in chrom_genes :
-
-            # reusable dictionary for output
-            out_d = {}.fromkeys(output_fields,0)
-            out_d.update(peak)
-            out_d['map type'] = ''
-            out_d['chromo'] = peak[chr_field]
-            out_d['peak loc'] = peak_loc
-
-            # determine intervals for promoter, gene, and downstream
-            if gene['strand'] == '+' :
-                promoter_coords = max(gene['txStart']-1-opts.upst_win,0), gene['txStart']-1
-                if opts.tss :
-                    gene_coords = gene['txStart'], min(gene['txEnd'],gene['txStart']+opts.dnst_win)
-                    downstream_coords = gene['txEnd']+1,gene['txStart']+opts.dnst_win
-                else :
-                    gene_coords = gene['txStart'], gene['txEnd']
-                    downstream_coords = gene['txEnd']+1, gene['txEnd']+1+opts.dnst_win
-            else :
-                promoter_coords = gene['txEnd']+1, gene['txEnd']+1+opts.upst_win # +1 because we're using 1 based indexing
-                if opts.tss :
-                    gene_coords = max(gene['txStart'],gene['txEnd']-opts.upst_win), gene['txEnd']
-                    downstream_coords = gene['txEnd']-1-opts.dnst_win, gene['txStart']-1 # -1 because we're using 1 based indexing
-                else :
-                    gene_coords = gene['txStart'], gene['txEnd']
-                    downstream_coords = gene['txStart']-1-opts.dnst_win, gene['txStart']-1 # -1 because we're using 1 based indexing
-
-            # check for promoter
-            if peak_loc >= promoter_coords[0] and peak_loc <= promoter_coords[1] :
-                out_d['map type'] = 'promoter'
-                out_d['dist from feature'] = peak_loc - promoter_coords[1] if gene['strand'] == '+' else promoter_coords[0] - peak_loc
-
-            # check for gene
-            elif peak_loc >= gene_coords[0] and peak_loc <= gene_coords[1] :
-                # check for intron/exon
-                exon_coords = zip(gene['exonStarts'],gene['exonEnds'])
-                in_exon = False
-                for st,en in exon_coords :
-                    if peak_loc >= st and peak_loc <= en :
-                        in_exon = True
-                        break
-                out_d['map type'] = 'gene'
-                out_d['map subtype'] = 'exon' if in_exon else 'intron'
-
-                # score = (peak-TSS)/(TSE-TSS) - peak distance from TSS as fraction of length of gene
-                gene_len = float(gene_coords[1]-gene_coords[0])
-                out_d['score'] = (peak_loc-gene_coords[0])/gene_len if gene['strand'] == '+' else (gene_coords[1]-peak_loc)/gene_len
-
-                # distance calculated from start of gene
-                out_d['dist from feature'] = peak_loc - promoter_coords[1] if gene['strand'] == '+' else promoter_coords[0] - peak_loc
-
-                map_stats[out_d['map subtype']] += 1
-
-            # check for downstream
-            elif peak_loc >= downstream_coords[0] and peak_loc <= downstream_coords[1] :
-                out_d['map type'] = 'after'
-                if opts.tss :
-                    out_d['dist from feature'] = peak_loc - gene_coords[0] if gene['strand'] == '+' else gene_coords[1] - peak_loc
-                else :
-                    out_d['dist from feature'] = peak_loc - downstream_coords[0] if gene['strand'] == '+' else downstream_coords[1] - peak_loc
-
-            # does not map to this gene
-            else :
-                pass
-
-            # map type is not blank if we mapped to something
-            if out_d['map type'] != '' :
-
-                #out_d = {'knownGeneID':gene['name']}
-                out_d['knownGeneID'] = gene['name']
-                if opts.symbol_xref :
-                    out_d['geneSymbol'] = symbol_xref_map[gene['name']]['geneSymbol']
-                peaks_writer.writerow(out_d)
-
-                mapped = True
-
-                # reset map_type
-                out_d['map type'] = ''
-
-        if not mapped :
-            if opts.intergenic :
-                out_d['knownGeneID'] = 'None'
-                out_d['geneSymbol'] = 'None'
-                out_d['map type'] = 'intergenic'
-                peaks_writer.writerow(out_d)
-            map_stats['intergenic'] += 1
-
-    if peak_output != sys.stdout :
-        peak_output.close()
-
-    #if opts.stats_output != sys.stderr :
-    #    opts.stats_output = open(opts.stats_output,'w')
-
-    #for k,v in map_stats.items() :
-    #    opts.stats_output.write('%s: %s\n'%(k,v))
-
-    #if opts.stats_output != sys.stderr :
-    #    opts.stats_output.close()
--- a/chipsequtil-master/scripts/motif_scan.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,330 +0,0 @@
-#!/usr/bin/env python
-
-import matplotlib
-matplotlib.use('AGG')
-
-import numpy as np
-import os
-import random
-import string
-import sys
-
-from math import log, pow
-import matplotlib.pyplot as mp
-from multiprocessing import Pool
-from optparse import OptionParser
-from scipy.stats.stats import pearsonr
-
-from chipsequtil import MACSFile, get_org_settings
-from chipsequtil.nib import NibDB
-from chipsequtil.sampling import rejection_sample_bg
-from TAMO import MotifTools as mt
-from TAMO.MotifTools import load
-
-usage = "%prog [options] <org> <peaks fn> <TAMO motif fn>"
-desc = "Do some motif scanning stuffs"
-parser = OptionParser(usage=usage,description=desc)
-
-parser.add_option('-n','--top-n',dest='top_n',type='int',default=None,
-                  help='use top n peaks by pvalue for sequence scanning [default: all]')
-parser.add_option('-i','--motif-indices',dest='motif_ind',default='all',
-                  help='which indices from <TAMO motif fn> to use [default: %default]')
-parser.add_option('-d','--dir',dest='dir',default='motif_results',
-                  help='write all results into this directory')
-parser.add_option('--fixed-peak-width',dest='fixed_w',type='int',default=None,
-                  help='use only a fixed peak window around the summit instead of whole peak')
-
-revcomp_map = string.maketrans('ACGT','TGCA')
-
-def score_sequence(seq,motif) :
-    ll_max = -sys.maxint
-    for i in range(len(seq)-len(motif)) :
-        # forward strand
-        ll_for_sum = 0
-        subseq = seq[i:i+len(motif)].upper()
-        for n,pos in zip(subseq,motif.ll) :
-            ll_for_sum += pos[n]
-        # reverse strand
-        ll_rev_sum = 0
-        subseq = reversed(subseq.translate(revcomp_map))
-        for n,pos in zip(subseq,motif.ll) :
-            ll_rev_sum += pos[n]
-        ll_max = max(ll_max,ll_for_sum,ll_rev_sum)
-
-    return ll_max
-
-illegal_fn_chars = '/;& ()'
-fn_trans = string.maketrans(illegal_fn_chars,'_'*len(illegal_fn_chars))
-
-def fasta_itr(fn) :
-    f = open(fn)
-    header = None
-    seq = None
-    for l in f :
-        if l.strip().startswith('>') :
-            if seq is not None :
-                yield (header,seq)
-                seq = None
-            header = l.strip()
-        else :
-            seq = seq+l.strip() if seq is not None else l.strip()
-
-    # last record
-    yield (header, seq)
-
-if __name__ == '__main__' :
-
-    opts, args = parser.parse_args(sys.argv[1:])
-
-    if len(args) != 3 :
-        parser.error('Exactly 3 non-option arguments must be provided')
-
-    org, peaks_fn, motif_fn = args
-
-    if not os.path.exists(opts.dir) :
-        os.mkdir(opts.dir)
-
-    peaks_dt = np.dtype([('chr',np.str_,13),('start',np.int32),('end',np.int32),('pvalue',np.float64)])
-    if opts.fixed_w is not None :
-
-        all_peaks = np.array([(r['chr'],
-                          r['start']+r['summit']-opts.fixed_w/2.,
-                          r['start']+r['summit']+opts.fixed_w/2.,
-                          r['-10*log10(pvalue)']) for r in MACSFile(peaks_fn)],
-                          dtype=peaks_dt)
-    else :
-        all_peaks = np.array([(r['chr'],
-                           r['start'],
-                           r['end'],
-                           r['-10*log10(pvalue)']) for r in MACSFile(peaks_fn)],
-                           dtype=peaks_dt)
-
-    # -10*log10(pvalue) -> -log10(pvalue)
-    all_peaks[:]['pvalue'] /= 10.
-    peak_pvals = all_peaks[:]['pvalue']
-
-    # find the sorted order of peaks by descending pvalue
-    peak_pval_inds = peak_pvals.argsort()
-    peak_pval_inds = peak_pval_inds[::-1] # ascending -> descending
-    all_peaks = all_peaks[peak_pval_inds,:]
-
-    # for pvalue vs motif score
-    pval_num_bins = 20
-    pval_bin_size = all_peaks[:]['pvalue'].size/pval_num_bins
-    # try to take at least 100 sequences, at most 10% of bin size
-    sample_percent = max(min(1.,100./pval_bin_size),0.1)
-    pval_bin_memo = {}
-
-    if opts.top_n is not None :
-        peaks = all_peaks[0:opts.top_n]
-        peak_pvals = peak_pvals[peak_pval_inds][0:opts.top_n]
-    else :
-        peaks = all_peaks
-
-    # extract fasta sequences for these peaks
-    nibDb = NibDB(nib_dirs=get_org_settings(org)['genome_dir'])
-
-    """
-    # get the peak sequences
-    sys.stderr.write('Getting peak sequences\n')
-    fasta_batch = []
-    for i in range(peaks.size) :
-        fasta_batch.append((str(peaks[i]['chr']),int(peaks[i]['start']),int(peaks[i]['end']),'+'))
-    fg_fasta_headers, fg_fasta = nibDb.get_fasta_batch(fasta_batch)
-
-    # need a dict for background sampling
-    # headers have genome_dir and .nib in them, strip that out
-    sys.stderr.write('Converting nib output to dict\n')
-    fg_fasta_headers = list(fg_fasta_headers)
-    fg_fasta_dict = {}
-    for h,s in zip(fg_fasta_headers,fg_fasta) :
-        h = h.replace('>'+get_org_settings(org)['genome_dir']+'/','')
-        h = h.replace('.nib','')
-        if len(s) > 150 :
-            fg_fasta_dict[h] = s
-
-    # now sample the background sequences
-    sys.stderr.write('Sampling bg sequences (len(fg_fasta)==%d)\n'%(len(fg_fasta_dict)))
-    #bg_fasta_dict = rejection_sample_bg(fg_fasta_dict,org,bg_match_epsilon=1e-3,verbose=True)
-    bg_fasta_dict = {}
-    bg_fasta = bg_fasta_dict.values()
-    """
-
-    # load the motifs
-    sys.stderr.write('Movin right along\n')
-    motifs = load(motif_fn)
-
-    if opts.motif_ind != 'all' :
-        motif_indices = [int(i) for i in opts.motif_ind.split(',') if len(i) != 0]
-        motifs = [motifs[i] for i in motif_indices]
-    else :
-        motif_indices = xrange(len(motifs))
-
-    # use all cores w/ a Pool
-    #pool = Pool(processes=opts.n_procs)
-
-    # go through each motif
-    job_params = []
-    res = []
-    #for i,m in zip(motif_indices,motifs) :
-    #    job_params.append((i,m,peak_pvals,fg_fasta,bg_fasta,opts.dir))
-    #seq_scores = pool.map(analyze_motif_sequences,job_params)
-
-    seq_scores = []
-    for m_i,m in zip(motif_indices,motifs) :
-
-        out_dir = opts.dir
-
-        try :
-            m_name = m.source.split('\t')[2]
-        except :
-            m_name = m.source.split()[0]
-
-        print 'starting',m_name
-
-        # pvalue vs motif score
-        pval_bin_bounds = []
-        pval_bin_pvals = []
-        pval_bin_ranges = np.arange(0,all_peaks[:]['pvalue'].size,pval_bin_size)
-        for st_i in pval_bin_ranges :
-
-            end_i = min(st_i+pval_bin_size,all_peaks[:]['pvalue'].size-1)
-            st_val = all_peaks[st_i]['pvalue']
-            end_val = all_peaks[end_i]['pvalue']
-
-            #print st_i, end_i, pval_bin_size, st_val, end_val
-
-            # keep track of the pvalue bounds of each bin
-            pval_bin_bounds.append((st_val,end_val))
-
-            # we sample sample_percent% of peaks in the bin to score
-            num_to_sample = int(sample_percent*(end_i-st_i))
-            inds_to_sample = random.sample(xrange(st_i,end_i),num_to_sample)
-
-            # we memoize the sequences we've seen before so we don't fetch seqs
-            # unnecessarily
-            unmemoed_inds_to_sample = set(inds_to_sample).difference(set(pval_bin_memo.keys()))
-
-            bin_fasta_batch = []
-            for peak_i in unmemoed_inds_to_sample :
-                bin_fasta_batch.append((str(all_peaks[peak_i]['chr']),
-                                        int(all_peaks[peak_i]['start']),
-                                        int(all_peaks[peak_i]['end']),
-                                        '+'))
-
-            if len(bin_fasta_batch) != 0 :
-                bin_headers, bin_seq = nibDb.get_fasta_batch(bin_fasta_batch)
-
-                for i, ind in enumerate(unmemoed_inds_to_sample) :
-                    pval_bin_memo[ind] = bin_seq[i].upper()
-
-            # score the sequences
-            pval_bin_pvals.append([])
-            for ind in inds_to_sample :
-                max_score = m.bestscan(pval_bin_memo[ind])
-                max_score = (max_score-m.minscore)/(m.maxscore-m.minscore)
-                pval_bin_pvals[-1].append(max_score)
-            pval_bin_pvals[-1] = np.array(pval_bin_pvals[-1])
-
-
-        mp.figure(figsize=(4,4))
-        font = {'size':'9'}
-        mp.rc('font',**font)
-
-        # box plot of the bins
-        mp.boxplot(pval_bin_pvals,positions=np.arange(len(pval_bin_pvals)))
-
-        # plot the means of the bins
-        #[(x[0]+x[1])/2. for x in pval_bin_bounds]
-        mp.plot(np.arange(len(pval_bin_pvals)),
-             [x.mean() for x in pval_bin_pvals],'bo')
-        mp.title('Sampled motif score vs binned peak pvalue')
-        mp.xlabel('Binned -log10(pvalue)')
-        mp.ylabel('Maximum normalized motif score')
-
-        img_fn = os.path.join(out_dir,m_name.translate(fn_trans)+'_%d_peakmot.png'%m_i)
-        mp.savefig(img_fn)
-        mp.clf()
-
-        continue
-
-        fg_ratios = []
-        for seq in fg_fasta :
-            #max_score = score_sequence(seq,m)
-            max_score = m.bestscan(seq.upper())
-            fg_ratios.append((max_score-m.minscore)/(m.maxscore-m.minscore))
-        fg_ratios = np.array(fg_ratios)
-
-        bg_ratios = []
-        for seq in bg_fasta :
-            #max_score = score_sequence(seq,m)
-            max_score = m.bestscan(seq.upper())
-            bg_ratios.append((max_score-m.minscore)/(m.maxscore-m.minscore))
-        bg_ratios = np.array(bg_ratios)
-
-        fg_mean = sum(fg_ratios)/len(fg_ratios)
-        fg_std = np.std(fg_ratios)
-        bg_mean = sum(bg_ratios)/len(bg_ratios)
-        bg_std = np.std(bg_ratios)
-
-        m_mat = np.array((fg_ratios,bg_ratios,peak_pvals))
-        fg_score_sort_inds = m_mat[0,:].argsort()
-
-        motif_score_cnts, motif_score_bins = np.histogram(m_mat[0,:],bins=20)
-        binned_motif_scores = []
-        for st, end in zip(motif_score_bins[:-1],motif_score_bins[1:]) :
-            binned_motif_scores.append(m_mat[2,(m_mat[0,:]>=st)&(m_mat[0,:]<end)])
-
-        mp.figure(figsize=(4,4))
-        font = {'size':'9'}
-        mp.rc('font',**font)
-
-        mp.plot(fg_ratios,peak_pvals,'bo')
-
-        # calculate pearson correlation coefficient
-        pear_r, pear_pval = pearsonr(fg_ratios,peak_pvals)
-        mp.title('Max motif strength vs peak pvalue\n(r=%.2f,pval=%.2g)'%(pear_r,pear_pval))
-        img_fn = os.path.join(out_dir,m_name.translate(fn_trans)+'_%d_corr.png'%m_i)
-        mp.savefig(img_fn)
-        mp.clf()
-
-        # line plot of average peak p-value for binned motif score
-        mp.title('Average peak p-value for binned motif score\n%s'%m_name)
-        mp.xlabel('normalized motif score')
-        mp.ylabel('-log10(pvalue)')
-        mp.boxplot(binned_motif_scores,positions=np.arange(motif_score_bins.size-1),sym='')
-        p = mp.plot(np.arange(motif_score_bins.size-1),
-                [x.mean() for x in binned_motif_scores],
-                'bo',
-                label='Mean fg score')
-        p = p[0]
-
-        # draw a crosshair
-        bg_median_ind = np.argwhere(((motif_score_bins<=bg_mean)[:-1] & (motif_score_bins>=bg_mean)[1:])).ravel()[0]
-        bg_median = np.median(binned_motif_scores[bg_median_ind])
-        xlim, ylim = p.axes.get_xlim(), p.axes.get_ylim()
-        mp.plot([bg_median_ind,bg_median_ind],ylim,'k-',label='Mean bg score=%.2g'%m_mat[1,:].mean())
-        mp.plot(xlim,[bg_median,bg_median],'k-')
-        mp.xticks(np.arange(motif_score_bins.size)[1::5],['%.2f'%x for x in motif_score_bins[1::5]])
-        mp.legend(loc='upper left')
-
-        img_fn = os.path.join(out_dir,m_name.translate(fn_trans)+'_%d_peakmot.png'%m_i)
-        mp.savefig(img_fn)
-        mp.clf()
-
-        ret_d ={'m_name': m_name,
-                'fg_mean': fg_mean,
-                'fg_std': fg_std,
-                'bg_mean': bg_mean,
-                'bg_std': bg_std,
-                'fg_scores': fg_ratios,
-                'bg_scores': bg_ratios,
-                #'wmw_pval': WMWtest(fg_ratios,bg_ratios)
-               }
-
-        # binned pvalue vs sampled motif score
-
-
-        print 'done with',m_name
-
-        seq_scores.append(ret_d)
--- a/chipsequtil-master/scripts/nibFrag.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,128 +0,0 @@
-#!/usr/bin/env python
-# nibFrag.py - a python implementation of Jim Kent's nibFrag command line utility
-
-import sys
-import warnings
-from optparse import OptionParser, OptionGroup
-
-from chipsequtil import get_file_parts, BEDFile
-from chipsequtil.nib import get_nib_batch, validate_nib_file, NibException, NOMASK, MASK, HARDMASK
-
-usage = '%prog [options] file.nib start end strand [outfile]\n  -- or --\n%prog [options] --batch file.nib batchfile [batchfile ...]'
-description = """A python implementation of Jim Kent's nibFrag utility that allows outputting to \
-stdout.  Otherwise the functionality is identical for the non-batch usage.  Batch mode accepts \
-one or more files containing sets of coordinates to extract from the nib file.  Only BED formatting \
-is accepted at the moment. All sequences are concatenated together in FASTA format.  To retrieve the \
-entire sequence, use END as the end argument."""
-epilog="Note: When specifying --name optionin batch mode, also specify --dbHeader to ensure unique FASTA headers."
-parser = OptionParser(usage=usage,description=description,epilog=epilog)
-#parser.add_option('--output',dest='output',default=sys.stdout,help='filename to write output to [default: stdout]')
-parser.add_option('--no-header',dest='no_header',action='store_true',help='only output sequence (no fasta header)')
-parser.add_option('--wrap-width',dest='wrap_width',type='int',default=50,help='wrap output sequence at this number of bases, 0 indicates no wrap (sequence ends up on single line) [default: %default]')
-parser.add_option('--batch',dest='batch',action='store_true',help='run in batch mode, interpret arguments after nib file as queries')
-parser.add_option('--batch-format',dest='batch_format',type='choice',choices=['BED'],default='BED',help='format to interpret batch files [default: %default]')
-#parser.add_option('--mask-type',dest='mask_type',type='choice',choices=['NOMASK','MASK','HARDMASK'],default='NOMASK',help='how to handle masked positions, correspond to original nibFrag options --masked and --hardMasked [default: %default]')
-
-# original nibFrag usage:
-#nibFrag - Extract part of a nib file as .fa (all bases/gaps lower case by default)
-#usage:
-#   nibFrag [options] file.nib start end strand out.fa
-#where strand is + (plus) or m (minus)
-#options:
-#   -masked - use lower case characters for bases meant to be masked out
-#   -hardMasked - use upper case for not masked-out and 'N' characters for masked-out bases
-#   -upper - use upper case characters for all bases
-#   -name=name Use given name after '>' in output sequence
-#   -dbHeader=db Add full database info to the header, with or without -name option
-#   -tbaHeader=db Format header for compatibility with tba, takes database name as argument
-
-# original nibFrag options
-nibFrag_grp = OptionGroup(parser,"Original nibFrag options")
-nibFrag_grp.add_option('--masked',dest='masked',action='store_true',help='use lower case characters for bases meant to be masked out')
-nibFrag_grp.add_option('--hardMasked',dest='hardmasked',action='store_true',help='use upper case for non masked-out and \'N\' characters for masked-out bases')
-nibFrag_grp.add_option('--upper',dest='upper',action='store_true',help='use upper case characters for all bases')
-nibFrag_grp.add_option('--name',dest='name',default=None,help='Use given name after \'>\' in output sequence')
-nibFrag_grp.add_option('--dbHeader',dest='dbHeader',default=None,help='Add full database info to the header, with or without -name option')
-nibFrag_grp.add_option('--tbaHeader',dest='tbaHeader',default=None,help='Format header for compatibility with tba, takes database name as argument')
-parser.add_option_group(nibFrag_grp)
-
-
-if __name__ == '__main__' :
-
-    opts, args = parser.parse_args(sys.argv[1:])
-
-    if len(args) < 1 :
-        parser.print_usage()
-        parser.exit(1)
-
-    # setup
-    nib_path = args[0]
-    nib_dir,nib_fn,nib_base,nib_ext = get_file_parts(nib_path)
-
-    queries = []
-    if opts.batch :
-
-        if len(args) < 2 :
-            parser.error('Two arguments must be supplied in batch mode')
-
-        batch_fns = args[1:]
-
-        for fn in batch_fns :
-            if opts.batch_format == 'BED' :
-                for bed in BEDFile(fn) :
-                    if bed['chrom'] != nib_base :
-                        warnings.warn('Chromosome in BED line %s does not match file %s, skipping'%(bed['chrom'],nib_base))
-                    else :
-                        queries.append((int(bed['chromStart']),int(bed['chromEnd']),bed['strand']))
-    else :
-
-        if len(args) < 4 :
-            parser.error('Four arguments must be supplied in non-batch mode')
-
-        # setup
-        strand = args[3]
-        start, end = int(args[1]),args[2]
-        if end == 'END' :
-            end = -1
-        else :
-            end = int(end)
-            if end < start :
-                parser.error('Stop coordinate %d smaller than start %d'%(end,start))
-
-        queries.append((start,end,strand))
-
-    mask_type = NOMASK
-    if opts.masked :
-        mask_type = MASK
-    elif opts.hardmasked :
-        mask_type = HARDMASK
-
-    # set the output file
-    if len(args) > 4 :
-        out_f = open(args[4],'w')
-    else :
-        out_f = sys.stdout
-
-    # get the sequences from the .nib file
-    try :
-        headers, seqs = get_nib_batch(nib_path,queries,mask_type)
-    except NibException, e :
-        sys.stderr.write(e.message+'\n')
-        sys.exit(1)
-
-    nbases = validate_nib_file(nib_path)
-
-    # output all queries
-    for header, seq in zip(headers,seqs) :
-
-        # write output
-        out_f.write(header)
-
-        if opts.upper :
-            seq = seq.upper()
-        if opts.wrap_width == 0 :
-            out_f.write(seq+'\n')
-        else :
-            for i in xrange(0,len(seq),opts.wrap_width) :
-                out_f.write(seq[i:i+opts.wrap_width]+'\n')
-
--- a/chipsequtil-master/scripts/org_settings.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,126 +0,0 @@
-#!/usr/bin/env python
-
-import os
-import sys
-from optparse import OptionParser
-from ConfigParser import ConfigParser, NoSectionError
-from pprint import pformat
-
-from chipsequtil import get_org_settings, get_global_settings, get_all_settings, get_local_settings, GLOBAL_SETTINGS_FN, LOCAL_SETTINGS_FN
-
-usage = '%prog [options] [<org key> [<org setting>]]'
-description='''Tool for retrieving sets of organism-specific settings and paths.
-Original paths are set at install time, and can be overridden in the file ~/.org
-settings.cfg. Allows output of settings in a variety of shell environment
-syntaxes.  The tool attempts to guess which shell environment is being used by
-examining the SHELL environment variable unless explicitly set.  When run without
-an argument, returns a listing of all settings available.
-'''
-parser = OptionParser(usage=usage,description=description)
-parser.add_option('-s','--syntax',dest='syntax',type='choice',\
-                  choices=['auto','python','bash','tcsh'],default='auto',help='syntax flavor \
-                  of output to produce [default: %auto]')
-parser.add_option('-l','--list',dest='list_sets',action='store_true',help='print \
-                  all available settings for human consumption')
-
-
-def obj_to_format(obj,format='python') :
-    '''Convert *obj* into a string that can be evaluated in the environment \
-    indicated in *format*.
-
-    obj -- a string, a dict of values, or a dict of dicts of values
-    format -- python (default), or bash
-    '''
-
-    if format == 'auto' :
-        format = os.environ.get('SHELL','python').split('/')[-1]
-
-    r = ''
-    if format == 'python' :
-        r = pformat(obj)
-    elif format in ['sh','bash','zsh','csh','tcsh'] :
-        statements = []
-        if format in ['sh','bash','zsh'] :
-            export_tmpl = 'export %s=%s'
-        elif format in ['csh','tcsh'] :
-            export_tmpl = 'setenv %s %s'
-
-        # dict
-        if isinstance(obj,dict) :
-            for k1, v1 in obj.items() :
-                # dict of dicts
-                if isinstance(v1,dict) :
-                    # these should be literal values
-                    for k2, v2 in v1.items() :
-                        statements.append(export_tmpl%('_'.join([k1,k2]).upper(),\
-                                          str(v2)))
-                else :
-                    v1 = str(v1)
-                    s = '\''+v1+'\'' if v1.count(' ') != 0 else str(v1)
-                    statements.append(export_tmpl%(k1.upper(),str(s)))
-        else :
-            return str(obj)
-
-        r = '\n'.join(statements)
-
-    return r
-
-
-if __name__ == '__main__' :
-
-    opts, args = parser.parse_args(sys.argv[1:])
-
-    # output depends on number of arguments passed
-    output = ''
-
-    # return everything we know about
-    if len(args) == 0 :
-
-        if opts.list_sets :
-
-            # always use python formatting when listing
-            opts.syntax = 'python'
-
-            # global settings
-            settings = get_global_settings()
-            output = 'Global settings: (%s)\n'%GLOBAL_SETTINGS_FN
-            output += obj_to_format(settings,opts.syntax) + '\n'
-
-            # local settings
-            settings = get_local_settings()
-            output += 'Local settings: (%s)\n'%LOCAL_SETTINGS_FN
-            output += obj_to_format(settings,opts.syntax)
-        else :
-            settings = get_all_settings()
-            output += obj_to_format(settings,opts.syntax)
-
-
-    # return all records from the specific organism
-    elif len(args) in (1,2) :
-
-        # make sure our config files have the requested organism
-        try :
-            settings = get_org_settings(args[0])
-        except NoSectionError :
-            sys.stderr.write('No entry %s found, available:\n'%args[0]+\
-                             pformat(get_all_settings().keys())+'\nExiting\n')
-            sys.exit(1)
-
-        # return the requested field from the specific organism
-        if len(args) == 2 :
-
-            # make sure the config file has the setting for this organism
-            try :
-                output = obj_to_format(settings[args[1]],opts.syntax)
-            except KeyError :
-                sys.stderr.write('Setting %s not found for %s, choices:\n'%(args[1],args[0])+
-                                 pformat(settings.keys())+'\nExiting\n')
-                sys.exit(2)
-        else :
-            output = obj_to_format(settings,opts.syntax)
-    else :
-        parser.error('Provide zero, one, or two argments, found %s'%args)
-
-    # bon voyage
-    sys.stdout.write(output+'\n')
-
--- a/chipsequtil-master/scripts/peaks_to_fasta.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,144 +0,0 @@
-#!/usr/bin/env python
-
-import os
-import sys
-import textwrap
-import warnings
-from optparse import OptionParser
-
-from chipsequtil import BEDFile, MACSFile, get_file_parts, get_org_settings
-from chipsequtil.nib import NibDB
-from chipsequtil.sampling import rejection_sample_bg
-from chipsequtil.util import MultiLineHelpFormatter
-from chipsequtil.seq import write_fasta_to_file
-
-
-usage='%prog [options] <organism> <peak file> [<peak file> ...]'
-description='''Extract sequences for peaks in provided peak file(s).  Can \
-interpret MACS or BED output, determined automatically by .xls or .bed extensions \
-respectively (force explicit format with --peak-format option).  Outputs fasta \
-sequences for the peaks in all files extracted from the reference genome specified \
-by the output of *org_settings.py <organism> genome_dir* to stdout by default.\
-Chromosome names in peak files must match nib filenames without extension (e.g. \
-peak line: chr1 0  100 searches *genome_dir*/chr1.nib).  Fasta records have the \
-following format:
-
-><chromosome>:<start>-<end>;fn=<name of file>:<line number>;db_fn=<db filename>;fmt=<format>;<source alignment info>
-<sequence...>
-
-<db filename> is the filename where the sequence was extracted, <format> is the \
-format of the input file (MACS or BED), and <source alignment info> contains all \
-the fields from the originating alignment according to the source format.'''
-parser = OptionParser(usage=usage,description=description,formatter=MultiLineHelpFormatter())
-parser.add_option('--min-header',dest='min_header',action='store_true',help='only store <chromosome>:<start>-<end> in header')
-parser.add_option('--peak-format',dest='peak_format',type='choice',
-                  choices=['auto','MACS','BED'],default='auto',
-                  help='peak file format, \'auto\' determines format by extension, choices: MACS, BED, auto [default: %default]')
-parser.add_option('--output',dest='output',default=None,help='filename to output fasta records to [default: stdout]')
-parser.add_option('--fixed-peak-width',dest='fixed_peak_width',type='int',default=None,help='return a fixed number of bases flanking peak summit (*summit* field in MACS, (end-start)/2 in BED), ignoring start/stop coords [default: None]')
-parser.add_option('--wrap-width',dest='wrap_width',type='int',default=70,help='wrap fasta sequences to specified width. -1 indicates no wrap [default: %default]')
-
-
-def bed_to_fasta(fn,db,min_header=False) :
-    #headers,seqs = db.get_fasta_from_bed(fn)
-    fastas = []
-    bed_recs = BEDFile(fn)
-    for i,rec in enumerate(bed_recs) :
-
-        if opts.fixed_peak_width :
-            midpoint = (rec['chromEnd']-rec['chromStart'])/2
-            start = max(0,midpoint-opts.fixed_peak_width/2)
-            end = min(midpoint+opts.fixed_peak_width/2,db.db_info[rec['chrom']]['nbases'])
-            coords = start, end
-        else :
-            coords = start,end = int(rec['chromStart']), int(rec['chromEnd'])
-
-        seq = db.get_seq(rec['chrom'], start, end)
-        seq_fn = db.db_info[rec['chrom']]['path']
-
-        header = '%s:%s;'%(rec['chrom'],'%d-%d'%(start,end))
-        if not min_header :
-            header = header.strip()+'%s:%d;fmt=BED;'%(fn,i)+ \
-                     ';'.join(['%s=%s'%(k,str(v)) for k,v in rec.items()])
-        fastas.append((header,seq))
-
-    return fastas
-
-
-def macs_to_fasta(fn,db,min_header=False) :
-    macs_recs = MACSFile(fn)
-    fasta = []
-    for i,rec in enumerate(macs_recs) :
-
-        if opts.fixed_peak_width :
-            # adjust start and end peak position based on summit, ensuring we don't step outside of the reference sequence bounds
-            start = max(0, rec['start']+rec['summit']-opts.fixed_peak_width/2)
-            end = min(rec['start']+rec['summit']+opts.fixed_peak_width/2, db.db_info[rec['chr']]['nbases'])
-            coords = start, end
-        else :
-            start, end = coords = rec['start'], rec['end']
-
-        seq = db.get_seq(rec['chr'],start,end)
-        seq_fn = db.db_info[rec['chr']]['path']
-
-        header = '%s:%s'%(rec['chr'],'%d-%d'%coords)
-        if not min_header :
-            header += ';%s:%d;db_fn=%s;fmt=MACS;'%(fn,i,seq_fn) + \
-                     ';'.join(['%s=%s'%(k,str(v)) for k,v in rec.items()])
-        fasta.append((header,seq))
-
-    return fasta
-
-
-if __name__ == '__main__' :
-
-    opts, args = parser.parse_args(sys.argv[1:])
-
-    if len(args) < 2 :
-        parser.error('Must provide at least two non-option arguments')
-
-    # instantiate the NibDB from the provided directory
-    organism = args[0]
-    nib_dir = get_org_settings(organism)['genome_dir']
-    nib_db = NibDB(nib_dirs=[nib_dir])
-
-    # determine specified format
-    peak_fmt = opts.peak_format
-
-    peak_fns = args[1:]
-
-    # determine if there is an output file
-    if opts.output :
-        out_f = open(opts.output,'w')
-    else :
-        out_f = sys.stdout
-
-    fasta_recs = []
-    for peak_fn in peak_fns :
-        # if --peak-format is auto, figure format out from extension
-        if opts.peak_format == 'auto' :
-            fnbase, fnext = os.path.splitext(peak_fn)
-            if fnext.lower() == '.bed' : # BED file
-                peak_fmt = 'BED'
-            elif fnext.lower() == '.xls' : # MACS file
-                peak_fmt = 'MACS'
-            else  :
-                warnings.warn('Peak format specified as auto but file extension \
-                               not recognized in file %s, skipping'%peak_fn)
-                continue
-
-        if peak_fmt == 'BED' :
-            fasta_recs.extend(bed_to_fasta(peak_fn,nib_db,min_header=opts.min_header))
-        elif peak_fmt == 'MACS' :
-            fasta_recs.extend(macs_to_fasta(peak_fn,nib_db,min_header=opts.min_header))
-
-    # write out foreground to file
-    if opts.output :
-        if opts.wrap_width == -1 :
-            opts.wrap_width = sys.maxint
-        write_fasta_to_file(dict(fasta_recs),opts.output,linelen=opts.wrap_width)
-    else :
-        for header, seq in fasta_recs :
-            if opts.wrap_width != -1 :
-                seq = textwrap.fill(seq,opts.wrap_width)
-            sys.stdout.write('>%s\n%s\n'%(header,seq))
--- a/chipsequtil-master/scripts/plot_peak_loc_dist.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,225 +0,0 @@
-#!/usr/bin/env python
-
-import matplotlib
-matplotlib.use('AGG')
-
-import matplotlib.pyplot as mp
-import numpy as np
-import os
-import sys
-
-from collections import defaultdict
-from csv import reader, writer
-from optparse import OptionParser
-from StringIO import StringIO
-
-from chipsequtil import MACSFile, BEDFile
-
-
-usage = '%prog [options] <peaks fn> <gene list fn>'
-desc = """Produce a pie chart of the locations of peaks in different bins
-(promoter, gene, exon, intron, etc.) and, optionally, save the different
-records to their own files for subsequent analysis.  Also produce a histogram
-of distance from feature values in mapping file. Peaks file is expected
-to be as output by MACS, or alternately as a BED file but then the -b plot
-is not available.  Gene list file is expected to be in the format as
-output by peaks_to_known_genes.py script."""
-parser = OptionParser(usage=usage,description=desc)
-parser.add_option('-b','--bar-fn',dest='bar_fn',default=None,help='filename for pvalue stacked bar chart')
-parser.add_option('-g','--gene-pie-fn',dest='gene_pie_fn',default=None,help='filename for pie chart image')
-parser.add_option('-p','--peak-pie-fn',dest='peak_pie_fn',default=None,help='filename for pie chart image')
-parser.add_option('-f','--dist-fn',dest='dist_fn',default=None,help='filename for distance from feature image')
-parser.add_option('-s','--save',dest='save',action='store_true',help='write out files containing peaks for each category')
-parser.add_option('-d','--output-dir',dest='out_dir',default='.',help='output files created by --save option to this directory')
-parser.add_option('--no-plot',dest='no_plot',action='store_true',help='dont show (but save) the figure produced')
-parser.add_option('--peaks-format',dest='peak_fmt',type='choice',choices=['MACS','BED'],default='MACS',help='format of peaks file, either MACS or BED [default: MACS]')
-
-GENE_FIELD_NAMES = ['knowngene_id','gene_symbol']
-LOC_FIELD_NAMES = ['peak_loc','dist_from_feature','score','map_type','map_subtype']
-int_or_none = lambda x: int(x) if x != '' else None
-float_or_none = lambda x: float(x) if x != '' else None
-LOC_FIELD_TYPES = [int_or_none,float_or_none,float_or_none,str,str]
-
-
-if __name__ == '__main__' :
-
-    opts, args = parser.parse_args(sys.argv[1:])
-
-    if len(args) != 2 :
-        parser.error('Exactly 2 non-option argument is required')
-
-    peaks_fn, gene_fn = args
-
-    if opts.peak_fmt == 'BED' :
-        peaks_f = BEDFile(peaks_fn)
-    else :
-        peaks_f = MACSFile(peaks_fn)
-
-    gene_reader = reader(open(gene_fn),delimiter='\t')
-    gene_recs, macs_recs, loc_recs = [], [], []
-    gene_reader.next() # get rid of header
-
-    gene_field_cnt = len(GENE_FIELD_NAMES)
-    macs_field_cnt = len(MACSFile.FIELD_NAMES)
-    loc_field_cnt = len(LOC_FIELD_NAMES)
-    for rec in gene_reader :
-
-        gene_recs.append(dict(zip(GENE_FIELD_NAMES,rec[:gene_field_cnt])))
-
-        # this automatically coerces recs into correct format
-        macs_line = [f(x) for f,x in zip(MACSFile.FIELD_TYPES,rec[gene_field_cnt:gene_field_cnt+macs_field_cnt])]
-        macs_recs.append(dict(zip(MACSFile.FIELD_NAMES,macs_line)))
-
-        loc_line = [f(x) for f,x in zip(LOC_FIELD_TYPES,rec[gene_field_cnt+macs_field_cnt:])]
-        loc_recs.append(dict(zip(LOC_FIELD_NAMES,loc_line)))
-
-    loc_dist = defaultdict(int)
-    unique_peaks = defaultdict(set)
-    exon_scores, intron_scores = [], []
-    dist_to_features = defaultdict(list)
-    pvals = defaultdict(list)
-
-    fn_base, fn_ext = os.path.splitext(gene_fn)
-    if opts.save :
-        def get_writer(fn) :
-            fd = writer(open(fn,'w'),delimiter='\t')
-            header = MACSFile.FIELD_NAMES
-            if opts.peak_fmt == 'BED' :
-                header = BEDFile.FIELD_NAMES
-            fd.writerow(GENE_FIELD_NAMES+header+LOC_FIELD_NAMES)
-            return fd
-        fds = {}
-
-    for gene, peak, loc in zip(gene_recs, macs_recs, loc_recs) :
-        # weird case, not sure why this happens
-        if loc['map_subtype'] == '0' :
-            loc['map_subtype'] = ''
-        key = loc['map_type']+'_%s'%loc['map_subtype'] if loc['map_subtype'] != '' else loc['map_type']
-        loc_dist[key] += 1
-        dist_to_features[key].append(int(loc['dist_from_feature']))
-        if opts.peak_fmt == 'MACS' :
-            pvals[key].append(float(peak['-10*log10(pvalue)']))
-
-        map_key = '%s:%d-%d'%(peak['chr'],peak['start'],peak['end'])
-        unique_peaks[key].add(map_key)
-
-        if key == 'gene_exon' :
-            exon_scores.append(loc['score'])
-        elif key == 'gene_intron' :
-            intron_scores.append(loc['score'])
-
-        if opts.save :
-            row = [gene[f] for f in GENE_FIELD_NAMES] + \
-                  [peak[f] for f in MACSFile.FIELD_NAMES] + \
-                  [loc[f] for f in LOC_FIELD_NAMES]
-            if not fds.has_key(key) :
-                fn = os.path.join(opts.out_dir,fn_base+'_'+key+fn_ext)
-                fds[key] = get_writer(fn)
-            fds[key].writerow(row)
-
-    # now find which peaks are intergenic
-    intergenic = []
-    num_peaks = 0
-    all_unique_peaks = reduce(lambda x,y: x.union(y), unique_peaks.values())
-    for l in peaks_f :
-        peak = l
-        map_key = '%s:%d-%d'%(peak['chr'],peak['start'],peak['end'])
-        if map_key not in all_unique_peaks :
-            unique_peaks['intergenic'].add(map_key)
-            intergenic.append(peak)
-            if opts.peak_fmt == 'MACS' :
-                pvals['intergenic'].append(peak['-10*log10(pvalue)'])
-        num_peaks += 1
-
-    num_int = len(intergenic)
-    loc_dist['intergenic'] = num_int
-    if opts.save :
-        fn = os.path.join(opts.out_dir,fn_base+'_intergenic.xls')
-        fd = writer(open(fn,'w'),delimiter='\t')
-        fd.writerow(MACSFile.FIELD_NAMES)
-        fd.writerows([[x[f] for f in MACSFile.FIELD_NAMES] for x in intergenic])
-
-    exon_scores, intron_scores = np.array(exon_scores), np.array(intron_scores)
-
-    font = {'size':'9'}
-    mp.rc('font',**font)
-    fig = mp.figure(figsize=(4,4))
-
-    bin_order = ('intergenic','gene_exon','gene_intron','promoter','after')
-    colors = 'bgrcm'
-
-    # pie chart
-    #pie_ax_rect = [0.1,0.35, 0.4125,  0.525 ] # left, bottom, width, height
-    pie_ax = fig.add_axes((0.15,0.15,0.7,0.7))
-    pie_ax.set_title('Gene map distribution\n%d peaks'%num_peaks)
-    pie_labels, pie_values = [], []
-    for k in bin_order :
-        pie_labels.append(k+'\n%d'%(len(unique_peaks[k])))
-        pie_values.append(len(unique_peaks[k]))
-    pie_ax.pie(pie_values,labels=pie_labels)
-
-    img_fn = fn_base+'_gene_loc.png' if opts.gene_pie_fn is None else opts.gene_pie_fn
-    mp.savefig(img_fn)
-    mp.clf()
-
-
-    fig = mp.figure(figsize=(4,4))
-    pie_ax = fig.add_axes((0.15,0.15,0.7,0.7))
-    pie_ax.set_title('Peak map distribution\n%d peaks'%num_peaks)
-    pie_labels, pie_values = [], []
-    for k in bin_order :
-        pie_labels.append(k+'\n%d'%(loc_dist[k]))
-        pie_values.append(loc_dist[k])
-    pie_ax.pie(pie_values,labels=pie_labels)
-
-    img_fn = fn_base+'_peak_loc.png' if opts.peak_pie_fn is None else opts.peak_pie_fn
-    mp.savefig(img_fn)
-    mp.clf()
-
-    fig = mp.figure(figsize=(4,4))
-    # dist to feature histogram
-    #hist_ax_rect = [0.65,0.45,0.25,0.45]
-    hist_ax = fig.add_axes((0.15,0.15,0.7,0.7))
-    hist_ax.set_title('Peak distance from TSS')
-    # join all the lists together
-    dists = sum(dist_to_features.values(),[])
-    pdf, bins, patches = hist_ax.hist(dists,bins=20)
-    #h = mp.hist(dists,bins=20)
-    hist_ax.set_xlim((int(min(dists)),int(max(dists))))
-
-    dist_fn = fn_base+'_dist.png' if opts.dist_fn is None else opts.dist_fn
-    mp.savefig(dist_fn)
-    mp.clf()
-
-    if opts.peak_fmt == 'MACS' :
-        fig = mp.figure(figsize=(4,4))
-        bar_ax = fig.add_axes((0.15,0.15,0.7,0.7))
-        pval_hists = {}
-        min_pval, max_pval = min([min(v) for v in pvals.values()]), max([max(v) for v in pvals.values()])
-        for key,pvals in pvals.items() :
-            vals, bins = np.histogram(pvals,range=(0,max_pval),bins=20)
-            lv = np.log10(vals)
-            lv[np.isneginf(lv)] = 0.1
-            pval_hists[key] = lv
-
-        pval_items = [(k,pval_hists[k]) for k in bin_order if pval_hists.has_key(k)]
-        bar_width = 0.85*(max_pval-min_pval)/(len(bins)-1)
-        print max_pval, min_pval, len(bins)
-        print 'bar_width:',bar_width
-        bars = []
-        b = bar_ax.bar(bins[:-1],pval_items[0][1],width=bar_width,color=colors[0])
-        bars.append(b)
-
-        sum_bottoms = pval_items[0][1]
-        for i, (key, pvals) in enumerate(pval_items[1:]) :
-            b = bar_ax.bar(bins[:-1],pvals,bottom=sum_bottoms,width=bar_width,color=colors[i+1])
-            bars.append(b)
-            sum_bottoms += pvals
-        bar_ax.legend([b[0] for b in bars],[x[0] for x in pval_items])
-        bar_ax.axis((-10,max(bins),0,max(sum_bottoms)))
-        bar_ax.set_title('Peak map distribution by pvalue')
-        bar_ax.set_xlabel('-10*log10(pvalue)')
-        bar_ax.set_ylabel('relative log10(# peaks)')
-
-        pval_fn = fn_base+'_pval_bar.png' if opts.bar_fn is None else opts.bar_fn
-        mp.savefig(pval_fn)
--- a/chipsequtil-master/scripts/plot_pos_vs_neg_peaks.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,103 +0,0 @@
-#!/usr/bin/env python
-
-import os
-import sys
-
-import matplotlib
-matplotlib.use('AGG')
-
-from matplotlib.pyplot import *
-from numpy import arange, log10
-from optparse import OptionParser
-
-from chipsequtil import MACSFile
-
-usage = '%prog [options] <pos peaks fn> <neg peaks fn>'
-parser = OptionParser(usage=usage)
-parser.add_option('-o','--output',dest='out_fn',default=None,help='filename of output image')
-
-if __name__ == '__main__' :
-
-    opts, args = parser.parse_args(sys.argv[1:])
-    pos_fn, neg_fn = args
-
-    pos_f, neg_f = MACSFile(pos_fn), MACSFile(neg_fn)
-
-    pos_peaks = []
-    pos_pvals = []
-    for pk in pos_f :
-        pos_pvals.append(float(pk['-10*log10(pvalue)'])/10.)
-        pos_peaks.append((pk['-10*log10(pvalue)'],pk))
-
-    pos_peaks.sort()
-
-    neg_peaks = []
-    neg_pvals = []
-    for pk in neg_f :
-        neg_pvals.append(float(pk['-10*log10(pvalue)'])/10.)
-        neg_peaks.append((pk['-10*log10(pvalue)'],pk))
-
-    neg_peaks.sort()
-
-    min_pval, max_pval = min(pos_pvals+neg_pvals), max(pos_pvals+neg_pvals)
-
-    pval_rng = arange(min_pval,max_pval,(max_pval-min_pval)/100.)
-
-    # construct cdfs
-    pos_cdf, neg_cdf = [], []
-    for pval in pval_rng :
-        pos_cdf.append(len(filter(lambda x: x >= pval,pos_pvals)))
-        neg_cdf.append(len(filter(lambda x: x >= pval,neg_pvals)))
-
-    # normalize cdfs
-    pos_cdf_norm = [1.*x/max(pos_cdf) for x in pos_cdf]
-    neg_cdf_norm = [1.*x/max(neg_cdf) for x in neg_cdf]
-
-    # log of pvals
-    pos_logs = map(log10,pos_cdf)
-    neg_logs = map(log10,neg_cdf)
-    plot(pval_rng,pos_logs)
-    plot(pval_rng,neg_logs)
-    ytics, ylabs = yticks()
-    clf()
-
-    # normalize logs for plotting
-    pos_logs_norm = [1.-x/max(pos_logs) for x in pos_logs]
-    neg_logs_norm = [1.-x/max(neg_logs) for x in neg_logs]
-
-    # calculate pos proportion for each pvalue
-    pos_ratio = []
-    pos_only = []
-    for pos, neg in zip(pos_cdf,neg_cdf) :
-        #pos_ratio.append(pos/(pos+neg))
-        if neg == 0 :
-            pos_only.append(pos_ratio[-1])
-            #pos_ratio.append(pos_ratio[-1])
-        else :
-            pos_ratio.append(pos/neg)
-
-    subplot(211)
-    plot(pval_rng, pos_logs, 'b-')
-    plot(pval_rng, neg_logs, 'g-')
-    yticks(ytics,[int(10**y) for y in ytics])
-    title('positive vs. negative peaks')
-    legend(('positive','negative'),loc='upper right')
-    xlabel('-log(p-value)')
-    ylabel('# Peaks')
-    axis('tight')
-
-    subplot(212)
-    plot(pval_rng[:len(pos_ratio)], map(log10,pos_ratio), 'k-')
-    plot(pval_rng[len(pos_ratio):], map(log10,pos_only),'k--')
-    #plot(pval_rng,pos_ratio, 'k-')
-    axis('tight')
-    xlabel('-log(p-value)')
-    #ylabel('# pos / (# pos + # neg)')
-    ylabel('log10(# pos / # neg)')
-
-    if opts.out_fn is None :
-        pos_base_fn, pos_fn_ext = os.path.splitext(pos_fn)
-        out_fn = '%s_pos_v_neg.png'%pos_base_fn
-    else :
-        out_fn = opts.out_fn
-    savefig(out_fn)
--- a/chipsequtil-master/scripts/probeset_to_known_gene.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,124 +0,0 @@
-#!/usr/bin/env python
-
-import gzip
-import sys
-from collections import defaultdict as dd
-from csv import DictReader, DictWriter
-from optparse import OptionParser
-from sqlite3 import connect
-
-from chipsequtil import KnownGeneFile
-
-# TODO make these parameters?
-#affy_anno_fn = 'Mouse430A_2.na30.annot.csv'
-
-usage = '%prog [options] <knownGene annotation> <knownToMOE430 file> <knownGene Xref file> <microarray data file>'
-description = 'Maps probset data to knownGene database provided by UCSC. Probesets \
-that map to multiple knownGenes have one record per knownGene with duplicate data \
-otherwise.  Output is knownGene id prepended to each record in microarray data file.'
-parser = OptionParser(usage=usage,description=description)
-parser.add_option('--output',dest='output',default=None,help='file to output mapping to [default: stdout]')
-#parser.add_option('--symbol-xref',dest='symbol_xref',default=None,help='use the provided kgXref file to output gene symbols as second column')
-
-if __name__ == '__main__' :
-
-    opts, args = parser.parse_args(sys.argv[1:])
-
-    #affy_bioc_fn = 'microarray_analysis/cbfb_vector_BH_all.txt'
-    #knownToMOE_sql_fn = 'knownToMOE430.sql'
-    #knownToMOE_data_fn = 'knownToMOE430.txt'
-
-    if len(args) < 3 :
-        parser.error('Incorrect number of arguments provided')
-
-    known_gene_fn = args[0]
-    knownToMOE_data_fn = args[1]
-    Xref_fn = args[2]
-    affy_bioc_fn = args[3]
-
-    # affymetrix file from bioconductor
-    affy_bioc_f = open(affy_bioc_fn)
-    affy_bioc = {}
-    affy_bioc_reader = DictReader(affy_bioc_f,delimiter="\t")
-    for row in  affy_bioc_reader :
-        affy_bioc[row['ID']] = row
-
-    # knownGene annotation
-    kg = KnownGeneFile(known_gene_fn)
-    kg_ids = dict([(x['name'],x) for x in kg])
-
-    # affy to knownGene
-    affy_to_kg_map = dd(list)
-    affy_to_kg_fields = ['kgID','affyID']
-    affy_to_kg_f = open(knownToMOE_data_fn)
-    kg_to_affy_map = dd(list)
-    for row in DictReader(affy_to_kg_f,fieldnames=affy_to_kg_fields,delimiter="\t") :
-        affy_to_kg_map[row['affyID'][2:]].append(row['kgID'])
-        kg_to_affy_map[row['kgID']].append(row['affyID'][2:])
-
-    if opts.output :
-        out_f = open(opts.output,'w')
-    else :
-        out_f = sys.stdout
-
-    out_header = ['knownGeneID']+affy_bioc_reader.fieldnames
-
-    # see if the user wants gene symbols too
-    opts.symbol_xref = Xref_fn
-    if opts.symbol_xref :
-        kgXref_fieldnames = ['kgID','mRNA','spID','spDisplayID','geneSymbol','refseq','protAcc','description']
-        symbol_xref_reader = DictReader(open(opts.symbol_xref),fieldnames=kgXref_fieldnames,delimiter='\t')
-        symbol_xref_map = {}
-        for rec in symbol_xref_reader :
-            symbol_xref_map[rec['kgID']] = rec
-        out_header = ['knownGeneID','geneSymbol']+affy_bioc_reader.fieldnames
-
-    out_writer = DictWriter(out_f,delimiter='\t',fieldnames=out_header,lineterminator='\n')
-    out_writer.writerow(dict(zip(out_header,out_header)))
-    for probesetID, data in affy_bioc.items() :
-        kg_ids = affy_to_kg_map[probesetID]
-        for kg_id in kg_ids :
-            out_l = {'knownGeneID':kg_id}
-            if opts.symbol_xref :
-                out_l['geneSymbol'] = symbol_xref_map[kg_id]['geneSymbol']
-            out_l.update(data)
-            out_writer.writerow(out_l)
-
-    # figure out if any probsets map to non-overlapping loci
-    # dirty dirty dirty dirty
-    if False :
-        affy_id_loci = {}
-        for affyID, kgIDs in affy_to_kg_map.items() :
-            # check all pairwise kgIDs to make sure they all overlap in transcription start sites
-            kg_id_loci = dd(list)
-            for i, kgID1 in enumerate(kgIDs) :
-                kgID1_rec = kg_ids[kgID1]
-                kg_id_loci[kgID1].append(kgID1_rec)
-                for j, kgID2 in enumerate(kgIDs) :
-                    kgID2_rec = kg_ids[kgID2]
-                    # these are all gene overlap conditions
-                    #kg1Start = kgID1_rec['txEnd'] if kgID1_rec['strand'] == '-' else kgID1_rec['txStart']
-                    #kg1End = kgID1_rec['txStart'] if kgID1_rec['strand'] == '-' else kgID1_rec['txEnd']
-                    #kg2Start = kgID2_rec['txEnd'] if kgID2_rec['strand'] == '-' else kgID2_rec['txStart']
-                    #kg2End = kgID2_rec['txStart'] if kgID2_rec['strand'] == '-' else kgID2_rec['txEnd']
-                    kg1Start, kg1End = kgID1_rec['txStart'], kgID1_rec['txEnd']
-                    kg2Start, kg2End = kgID2_rec['txStart'], kgID2_rec['txEnd']
-                    if (kg2Start <= kg1Start <= kg2End or \
-                       kg1Start <= kg2Start <= kg1End or \
-                       (kg2Start < kg1Start and kg2End > kg1End) or \
-                       (kg1Start < kg2Start and kg1End > kg2End)) and \
-                       kgID1_rec['chrom'] == kgID2_rec['chrom'] and \
-                       i != j :
-                        # we have overlap
-                        pass
-                    elif i != j :
-                        # doesn't overlap oh noes
-                        kg_id_loci[kgID1].append(kgID2_rec)
-            for kg_id, kg_recs in kg_id_loci.items() :
-                if len(kg_recs) != 1 :
-                    affy_id_loci[affyID] = (kg_id, len(kg_recs),len(kgIDs),kg_recs,kgIDs)
-
-        if len(affy_id_loci) != 0 :
-            sys.stderr.write('Warning: %d probeset ids map to non-overlapping loci'%len(affy_id_loci))
-
-
--- a/chipsequtil-master/scripts/rejection_sample_fasta.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,78 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-from optparse import OptionParser
-
-from chipsequtil import check_org_settings
-from chipsequtil.util import MultiLineHelpFormatter
-from chipsequtil.sampling import rejection_sample_bg
-from chipsequtil.seq import fasta_to_dict, write_fasta_to_file
-
-usage = '%prog [options] <organism> <fasta file> [<fasta file> ... ]'
-description = """Use rejection sampling to generate a set of background/random \
-sequences matching the distance to nearest transcription start site, sequence \
-length, and GC content distributions of the input fasta file(s).  Generated \
-sequences are genomic sequences sampled based on these distributions. All sequences \
-from all files are used to generate the background sequences. The following \
-command must output a path to a nib genomic sequence directory and refGene \
-annotation, respectively :
-
-$> org_settings.py <organism> genome_dir
-$> org_settings.py <organism> refgene_anno_path
-
-Utility prints out generated fasta records to stdout by default.  Input sequences \
-from chr20 are mapped to chrX, chr21 are mapped to chrY, and sequences from chrM \
-are not used.
-"""
-epilog = "Note: script only considers sequences with unique header names, only the last record of those with identical header names is used"
-parser = OptionParser(usage=usage,description=description,formatter=MultiLineHelpFormatter())
-parser.add_option('-n','--num-seqs',dest='num_seqs',default='1x', help='number of sequences to generate, either absolute number or factor of # input sequences, e.g. 2.5x for 2.5 times the # of input sequences [default: 1x]')
-parser.add_option('--output',dest='output',default=None,help='file to output fasta records to [default: stdout]')
-parser.add_option('--bed',dest='bed',action='store_true', help='also produce a BED formatted file representing sampled sequences')
-parser.add_option('--bed-output',dest='bed_output',default='output.bed',help='with --bed, file to output BED records to [default: %default]')
-parser.add_option('-v','--verbose',dest='verbose',action='store_true',help='print out debug information')
-
-if __name__ == '__main__' :
-
-    opts, args = parser.parse_args(sys.argv[1:])
-
-    if len(args) < 2 :
-        parser.error('Must be 2 non-option arguments')
-
-    organism, fasta_fns = args[0], args[1:]
-
-    reqd_settings = ['genome_dir','refgene_anno_path']
-    if not check_org_settings(organism,reqd_settings) :
-        parser.error('The <organism> settings set must contain paths for %s'%reqd_settings)
-
-    # load up all the fasta records
-    fasta_recs = {}
-    for fasta_fn in fasta_fns :
-        fasta = fasta_to_dict(fasta_fn)
-        fasta_recs.update(fasta)
-
-    # parse --num-seqs argument
-    if opts.num_seqs.endswith('x') :
-        num_seq_factor = float(opts.num_seqs[:-1])
-        num_seqs = int(len(fasta_recs)*num_seq_factor)
-    else :
-        try :
-            num_seqs = int(opts.num_seqs)
-        except TypeError :
-            parser.error("Incorrect format of --num-seqs argument, must either be an integer or a factor ending with x, e.g. 2.5x")
-
-    # generate the sequences
-    gen_seqs = rejection_sample_bg(fasta_recs,organism,num_samples=num_seqs,verbose=opts.verbose)
-
-    # write out to file
-    if opts.output :
-        write_fasta_to_file(gen_seqs,opts.output)
-    else :
-        sys.stdout.write(''.join(['>%s\n%s\n'%(k,v) for k,v in gen_seqs.items()]))
-
-    if opts.bed :
-        bed_f = open(opts.bed_output,'w')
-        bed_f.write(''.join([k.replace(':','\t').replace('-','\t')+'\n' for k in gen_seqs.keys()]))
-        bed_f.close()
-
--- a/chipsequtil-master/scripts/sort_bed.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,44 +0,0 @@
-#!/usr/bin/env python
-import sys, os
-from optparse import OptionParser
-from collections import defaultdict as dd
-from csv import reader, writer
-
-
-usage = "%prog [options] <BED file> [<BED file> <BED file>...]"
-description = """\
-Sort the BED formatted files first by chromosome (field 1) and then by start
-coordinate (field 2).  Lines from all files submitted are concatenated and
-sorted in the final output."""
-parser = OptionParser(usage=usage,description=description)
-parser.add_option('--output',dest='output',default=sys.stdout,help='filename to write the sorted BED lines [default: stdout]')
-
-if __name__ == '__main__' :
-
-    opts, args = parser.parse_args(sys.argv[1:])
-
-    if len(args) == 0 :
-        parser.error("Must provide at least one file")
-
-    fns = args
-    chromos = dd(list)
-
-    # load each chromosome separately
-    for fn in fns :
-        bed_reader = reader(open(fn),delimiter='\t')
-        for line in bed_reader :
-            chromos[line[0]].append(line)
-
-    # determine where we're writing to
-    if opts.output != sys.stdout :
-        f = open(opts.output,'w')
-    else :
-        f = opts.output
-
-    # write the chromos in lexicographic sorted order
-    bed_writer = writer(f,delimiter='\t')
-    for k in sorted(chromos.keys()) :
-
-        # sort each chromosome's BED lines by stat position
-        chromos[k].sort(key=lambda x: int(line[1]))
-        bed_writer.writerows(chromos[k])
--- a/chipsequtil-master/scripts/split_file.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,83 +0,0 @@
-#!/usr/bin/env python
-
-from optparse import OptionParser
-from datetime import datetime
-from subprocess import Popen, PIPE
-import itertools
-import sys, os, getpass, re
-
-usage = "[%prog] [options] filename"
-description = """\
-Split <filename> into a set of files with either a specific number of lines
-(--split-type=lines, default) or into a specific number of files (--split-type=
-count).  Files are created with .XXXX appended, indicating the number of file
-split. Writes files to current working directory unless otherwise specified.
-"""
-
-parser = OptionParser(usage=usage,description=description)
-parser.add_option('--type',dest='split_type',type='choice',choices=['lines','count'],default='lines',help='how to split the file (WARNING: count does not preserve the sequence of lines in the original file when splitting) [default: %default]')
-#parser.add_option('--split-arg',dest='split_arg',default='1000',help='integer argument for split type (size specified as Xb, XK, XM, or XG, others are integers) [default: %default]')
-parser.add_option('--arg',dest='split_arg',type='int',default=1000,help='integer argument for split type [default: %default]')
-parser.add_option('--outdir',dest='outdir',default='.',help='directory to put the split files in [default: %default]')
-
-def get_file_parts(fn) :
-    fpath,fname = os.path.split(fn)
-    fbase,fext = os.path.splitext(fname)
-    return fpath,fname,fbase,fext
-
-if __name__ == '__main__' :
-
-    opts, args = parser.parse_args(sys.argv[1:])
-
-    if len(args) < 1 :
-        parser.print_usage()
-        sys.exit(1)
-
-    filename = args[0]
-    abs_filename = os.path.abspath(filename)
-
-    # check to ensure filename exists
-    if not os.path.exists(abs_filename) :
-        sys.stderr.write('File %s does not exist, exiting\n'%abs_filename)
-        parser.print_usage()
-        sys.exit(2)
-
-    # split the file
-    split_size = opts.split_arg
-    fpath,fname,fbase,fext = get_file_parts(abs_filename)
-    if opts.split_type == 'lines' :
-        curr_split = 0 # for first condition
-        split_fd = None
-        for i,l in enumerate(open(abs_filename)) :
-            if i%split_size == 0 :
-                if split_fd : split_fd.close() # close it if we aren't on the first split
-                split_fd = open(os.path.join(opts.outdir,fname)+'.%04d'%curr_split,'w')
-                curr_split += 1
-            split_fd.write(l)
-        nlines = i
-    elif opts.split_type == 'count' :
-        # create split_size split files by writing lines round robin
-        split_fds = [open(os.path.join(opts.outdir,fname)+'.%04d'%x,'w') for x in range(split_size)]
-        split_cycle = itertools.cycle(split_fds)
-        for i,l in enumerate(open(abs_filename)) :
-            split_cycle.next().write(l)
-        nlines = i
-
-        # close all the handles
-        [fd.close() for fd in split_fds]
-
-    elif opts.split_type == 'size' :
-        # parse split_arg argument, into integer if split_type is 'size'
-        if opts.split_type == 'size' :
-            m = re.match('^(\d+)([bKMG])$',opts.split_arg)
-            if m is None :
-                sys.stderr.write("Incorrect --split-arg argument for --split-type=size, I understand only X[bKMG], exiting\n")
-                parser.print_usage()
-                sys.exit(3)
-            else :
-                size_d = {'b':1,'K':1024,'M':pow(1024,2),'G':pow(1024,3)}
-                split_size = int(m.groups()[0])*size_d[m.groups()[1]]
-
-        fd = open(abs_filename)
-        curr_split_size = 0
-
--- a/chipsequtil-master/scripts/split_qsub.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,73 +0,0 @@
-#!/usr/bin/env python
-
-from __future__ import with_statement
-import os
-import sys
-from optparse import OptionParser
-from subprocess import Popen, PIPE
-
-from chipsequtil import get_file_parts
-
-usage = "[%prog] [options] <utility> <file> [<file> <file> ...]"
-description = """\
-Submit a job using qsub for <utility>, each with one <file> as an argument.  Any
-options specified on the command line that [%prog] cannot interpret are passed
-on to the utility for each call."""
-epilog = "Note: this script only works in Unix-style environments"
-parser = OptionParser(usage=usage,description=description,epilog=epilog)
-parser.add_option('--suffix',dest='suffix',default=None,help='string to append to stdout files, e.g. <filename>_<--suffix>.<--ext> [default: <utility>]')
-parser.add_option('--ext',dest='ext',default='.out',help='file extension to use for stdout files')
-parser.add_option('--util-args',dest='util_args',default='',help='double quote wrapped arguments to pass to <utility>')
-parser.add_option('--keep-stderr',dest='keep_stderr',action='store_true',help='capture stderr files, useful for debugging')
-parser.add_option('--keep-scripts',dest='keep_scripts',action='store_true',help='do not delete qsub scripts generated after job submission')
-parser.add_option('--die-on-error',dest='die_on_err',action='store_true',help='if any one of the qsub submissions returns non-zero exit status, stop executing')
-
-
-if __name__ == '__main__' :
-
-    opts, args = parser.parse_args(sys.argv[1:])
-
-    utility, filenames = args[0], args[1:]
-
-    # try to find the utility
-    abs_utility = os.path.abspath(utility)
-    if not os.path.exists(abs_utility) :
-        # look on the path
-        abs_utility = Popen('which %s'%utility,shell=True,stdout=PIPE,stderr=PIPE).communicate()[0].strip()
-        if not os.path.exists(abs_utility) :
-            raise Exception("Utility %s could not be found in the local directory or on the user's path, exiting"%utility)
-            sys.exit(1)
-
-    upath,uname,ubase,uext = get_file_parts(abs_utility)
-
-    runscript_tmpl = """
-#!/bin/bash
-
-#$ -N %(jobname)s
-#$ -S /bin/sh
-#$ -o %(stdout)s
-#$ -e %(stderr)s
-#$ -cwd
-export PYTHONPATH=%(pythonpath)s:${PYTHONPATH}
-
-%(utility)s %(utilargs)s %(filename)s"""
-
-    suffix = ubase if opts.suffix is None else opts.suffix
-    for fn in filenames :
-        abs_fn = os.path.abspath(fn)
-        fpath,fname,fbase,fext = get_file_parts(abs_fn)
-        stdout = os.path.join(fpath,fname+'_'+suffix+opts.ext)
-        stderr = '/dev/null' if not opts.keep_stderr else os.path.join(fpath,fname+'_'+suffix+'.err')
-        call_script = runscript_tmpl%{'jobname':fname,'utility':abs_utility,'filename':abs_fn,'stdout':stdout,'stderr':stderr,'utilargs':opts.util_args,'pythonpath':os.environ.get('PYTHONPATH','')}
-        f = open('%s'%abs_fn+'_'+utility+'.script','w')
-        f.write(call_script)
-        f.close()
-        p = Popen('qsub %s'%f.name,shell=True)
-        p.wait()
-        if not opts.keep_scripts :
-            os.remove(f.name)
-
-        if opts.die_on_err and p.returncode != 0 :
-            with open(stderr,'w') as f :
-                f.write('qsub returned non-zero exit code for file %s, aborting\n'%fn)
-            sys.exit(1)
--- a/chipsequtil-master/scripts/wait_for_jobid.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,65 +0,0 @@
-#!/usr/bin/env python
-
-import re
-import sys
-import time
-
-from optparse import OptionParser
-from subprocess import Popen, PIPE
-
-usage = '%prog [options] <job id> [<job id>...]'
-desc = 'Poll qstat and wait until all <job id>s are finished'
-parser = OptionParser(usage=usage,description=desc)
-
-array_job_match = '^(\d+)\[\]\.(.*)'
-array_job_regex = '^%s\[[0-9]\+\]'
-
-def is_job_done(jobid) :
-
-    done = False
-
-    # have to handle array jobs differently than standalone
-    array_match = re.search(array_job_match,jobid)
-    if array_match is not None :
-        idnum, rest = array_match.groups()
-        jobid_regex = array_job_regex%idnum
-        qstat_p = Popen('qstat -t | grep "%s" | cut -f 1 -d " "'%jobid_regex,shell=True,stdout=PIPE)
-        stdout, stderr = qstat_p.communicate()
-        done = len(stdout) == 0
-
-    else :
-        # -j is only for SGE
-        qstat_p = Popen('qstat -j %s'%jobid,shell=True,stdout=PIPE,stderr=PIPE)
-        qstat_p.wait()
-        if qstat_p.returncode == 0 :
-            pass
-        # assume any != 0 return code means job is done
-        else :
-            done = True
-
-    return done
-
-if __name__=='__main__':
-
-    opts, args = parser.parse_args(sys.argv[1:])
-
-    jobids = map(lambda x: x.strip(), args)
-
-    # wait for all of them
-    sys.stderr.write('Waiting for jobs to complete\n')
-    jobs_done = [False]*len(jobids)
-    try :
-        while not all(jobs_done) :
-            jobs_not_done = filter(lambda x: not x[1], enumerate(jobs_done))
-            for i, jid in jobs_not_done :
-                jobs_done[i] = is_job_done(jobids[i])
-            sys.stderr.write('Jobs done: %d/%d\r'%(sum(jobs_done),len(jobs_done)))
-            time.sleep(2)
-            sys.stderr.flush()
-    except KeyboardInterrupt :
-        sys.stderr.write('\n')
-        resp = raw_input('Caught keyboard interrupt, kill all jobs? [y/N] ')
-        if resp.lower() == 'y' :
-            Popen('kill_all_jobs.sh',shell=True)
-
-    sys.stderr.write('done\n')
--- a/chipsequtil-master/scripts/wait_for_qsub.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,14 +0,0 @@
-#!/usr/bin/env python
-import time
-from subprocess import Popen, PIPE
-
-if __name__ == '__main__' :
-
-    # this is gross, but it works when you need to stall a pipeline until all your jobs are done
-    done = False
-    while not done :
-        qstat_output = Popen('qstat',shell=True,stdout=PIPE).communicate()[0]
-        if qstat_output == '' :
-            done = True
-        else :
-            time.sleep(1)
--- a/chipsequtil-master/scripts/wqsub.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,145 +0,0 @@
-#!/usr/bin/env python
-
-from __future__ import with_statement
-import os
-import re
-import sys
-import time
-from optparse import OptionParser
-from subprocess import Popen, PIPE
-
-from chipsequtil import get_file_parts
-
-usage = "[%prog] [options] command"
-description = """Wrap the specified command into a qsub script and submit it
-for execution. Script captures both stdout and stderr to the current directory.
-By default, all of the user's environment variables are put into the script
-(compatible with SGE only ATM)."""
-epilog = "Note: this script only works in Unix-style environments."
-parser = OptionParser(usage=usage,description=description,epilog=epilog)
-parser.add_option('--wqsub-name',dest='wqsub_name',default='wqsub',help='job name to submit as <--wqsub-name>_<first non-whitespace chars in command> [default: %default]')
-parser.add_option('--wqsub-ext',dest='wqsub_ext',default='.out',help='file extension to use for stdout files')
-parser.add_option('--wqsub-keep-script',dest='wqsub_keep_script',action='store_true',help='do not delete qsub script generated after job submission')
-parser.add_option('--wqsub-no-env',dest='wqsub_no_env',action='store_true',help='do not include any local environment variables in the script')
-parser.add_option('--wqsub-no-submit',dest='wqsub_no_sub',action='store_true',help='create script but do not submit job (useful for generating scripts)')
-parser.add_option('--wqsub-drm',dest='drm',default='SGE',type='choice',choices=['SGE','TORQUE'],help='the DRM to generate scripts for [default: %default]')
-parser.add_option('--wqsub-drm-arg',dest='drm_args',action='append',default=[],help='arguments to pass as parameters in the job script specific to the DRM, use multiple option flags to specify multiple parameters')
-parser.add_option('--wqsub-wait',dest='wait',action='store_true',help='poll the DRM and do not return control until job is finished (only works for TORQUE)')
-
-templates = {
-'TORQUE': """\
-#!/bin/bash
-
-#PBS -N %(jobname)s
-#PBS -o %(stdout)s
-#PBS -e %(stderr)s
-#PBS -d %(cwd)s
-%(env)s
-%(addnl)s
-
-%(command)s
-""",
-'SGE':"""\
-#!/bin/bash
-
-#$ -N %(jobname)s
-#$ -S /bin/bash
-#$ -o %(stdout)s
-#$ -e %(stderr)s
-#$ -cwd
-%(env)s
-%(addnl)s
-
-%(command)s
-"""
-}
-
-drm_symb = {
-'TORQUE': 'PBS',
-'SGE': '$'
-}
-
-if __name__ == '__main__' :
-
-    # get the wqsub args out first
-    wqsub_args = []
-    other_args = []
-    for arg in sys.argv :
-        if arg.count('wqsub') != 0 or arg in ['-h','--help'] :
-            wqsub_args.append(arg)
-        else :
-            other_args.append(arg)
-
-    opts, args = parser.parse_args(wqsub_args)
-
-    if len(other_args) == 0 :
-        parser.error('Must provide a command')
-
-    command = ' '.join(other_args)
-    runscript_tmpl = templates[opts.drm]
-    # set up job parameters
-    cmd_exe = os.path.basename(other_args[0])
-    jobname = opts.wqsub_name+'_'+cmd_exe
-    stdout_fn = jobname+opts.wqsub_ext
-    stdout = os.path.abspath(stdout_fn)
-    fpath,fname,fbase,fext = get_file_parts(stdout)
-    stderr = os.path.abspath(os.path.join(jobname+'.err'))
-
-    # get the user's current environment and put it into the execute script
-    if opts.wqsub_no_env :
-        env_str = '# local environment variables omitted'
-    else :
-        env_str = '#%s -V'%drm_symb[opts.drm]
-
-    # construct the script
-    addnl_params = []
-    for addnl in opts.drm_args :
-        addnl_params.append('#%s %s'%(drm_symb[opts.drm],addnl))
-    addnl_params = '\n'.join(addnl_params)
-
-    job_dict = {'jobname':fname,
-                'stdout':stdout,
-                'stderr':stderr,
-                'command':command,
-                'env':env_str,
-                'cwd':os.getcwd(),
-                'addnl':addnl_params}
-
-    call_script = runscript_tmpl%job_dict
-    # write the script to file
-    script_fn = os.path.abspath(jobname+'.script')
-    with open(script_fn,'w') as f :
-        f.write(call_script)
-
-    if not opts.wqsub_no_sub :
-        p = Popen('qsub %s'%f.name,shell=True,stdout=PIPE)
-        p.wait()
-        stdout, stderr = p.communicate()
-        if not opts.wqsub_keep_script :
-            os.remove(f.name)
-        if opts.wait :
-            done = False
-            print 'Waiting on job id %s'%stdout.strip()
-            while not done :
-                qstat_p = Popen('qstat %s'%stdout,shell=True,stdout=PIPE,stderr=PIPE)
-                qstat_p.wait()
-                if opts.drm == 'TORQUE' :
-                    done = False if qstat_p.returncode != 153 else True
-                elif opts.drm == 'SGE' :
-                    done = False if qstat_p.returncode != 1 else True
-                time.sleep(3) # wait three seconds because it's nice
-        else :
-            if opts.drm == 'TORQUE' :
-                print stdout.strip()
-            elif opts.drm == 'SGE' :
-                qsub_output_patt = 'Your job (\d+)'
-                m = re.match(qsub_output_patt,stdout.strip())
-                if m is not None:
-                    print m.group(1)
-                    sys.exit(0)
-
-                # might be an array job
-                qsub_output_patt = 'Your job-array (\d+)\.'
-                m = re.match(qsub_output_patt,stdout.strip())
-                if m is not None:
-                    print m.group(1)
--- a/chipsequtil-master/scripts/wqsub_drmaa.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,98 +0,0 @@
-#!/usr/bin/env python
-
-from __future__ import with_statement
-import os
-import sys
-from optparse import OptionParser
-from subprocess import Popen, PIPE
-
-import drmaa
-
-from chipsequtil import get_file_parts
-
-usage = "[%prog] [options] command"
-description = """Submit *command* to a DRMAA-enabled job queueing system.
-Output of the command goes to file, stderr is ignored unless specified
-as an option.  By default, all of the user's environment
-variables are imported into job environment."""
-epilog = "Note: this script only works in Unix-style environments."
-parser = OptionParser(usage=usage,description=description,epilog=epilog)
-parser.add_option('--wqsub-name',dest='wqsub_name',default='wqsub',help='job name to submit as <--wqsub-name>_<first non-whitespace chars in command> [default: %default]')
-parser.add_option('--wqsub-stdout',dest='wqsub_stdout',default=None,help='name of file to write stdout to (equivalent to -o argument in SGE) [default: <wqsub-name>_<command>.out]')
-parser.add_option('--wqsub-stderr',dest='wqsub_stderr',default=None,help='name of file to write stderr to (equivalent to -e argument in SGE) [default: <wqsub-name>_<command>.err]')
-parser.add_option('--wqsub-join',dest='wqsub_join',action='store_true',help='join stdout and stderr into file indicated by --wqsub-stdout (equivalent to -j flag in SGE)')
-parser.add_option('--wqsub-no-env',dest='wqsub_no_env',action='store_true',help='do not include any local environment variables in the script')
-parser.add_option('--wqsub-wait',dest='wqsub_wait',action='store_true',help='wait for job to finish executing before returning from script')
-
-
-if __name__ == '__main__' :
-
-    # get the wqsub args out first
-    wqsub_args = []
-    other_args = []
-    for arg in sys.argv :
-        if arg.count('wqsub') != 0 or arg in ['-h','--help'] :
-            wqsub_args.append(arg)
-        else :
-            other_args.append(arg)
-
-    opts, args = parser.parse_args(wqsub_args)
-
-    if len(other_args) == 0 :
-        parser.error('Must provide a command')
-
-    # set up job parameters
-    jobname = opts.wqsub_name+'_'+other_args[0]
-    stdout_fn = jobname+'.out'
-    if opts.wqsub_stdout :
-        stdout_fn = opts.wqsub_stdout
-    stdout = os.path.abspath(stdout_fn)
-
-    if os.path.exists(stdout) :
-        os.remove(stdout)
-
-    stderr_fn = jobname+'.err'
-    if opts.wqsub_stderr :
-        stderr_fn = opts.wqsub_stderr
-    stderr = os.path.abspath(stderr_fn)
-    if os.path.exists(stderr) :
-        os.remove(stderr)
-
-    # drmaa job submission
-    session = drmaa.Session()
-    session.initialize()
-
-    # initialize job template
-    job_template = session.createJobTemplate()
-
-    # construct DRMAA job
-    command,args = other_args[0],other_args[1:]
-    job_template.remoteCommand = command
-    job_template.args = args
-    job_template.jobName = jobname
-    job_template.joinFiles = opts.wqsub_join
-
-    # output and error paths apparently require a ':' in front
-    job_template.outputPath = ':'+stdout
-    job_template.errorPath = ':'+stderr
-
-    # get the user's current environment and put it into the execute script
-    if not opts.wqsub_no_env :
-        job_template.jobEnvironment = os.environ
-
-    # submit the job and wait for it
-    jobid = session.runJob(job_template)
-
-    if opts.wqsub_wait :
-        # submit and wait for job to complete, keyboard interrupt aborts job
-        try :
-
-            retval = session.wait(jobid, drmaa.Session.TIMEOUT_WAIT_FOREVER)
-
-        except KeyboardInterrupt :
-            sys.stderr.write('Keyboard interrupt caught (^C), aborting')
-            pass
-
-    # clean up
-    session.deleteJobTemplate(job_template)
-    session.exit()
Binary file chipsequtil-master/src/._chipsequtil has changed
Binary file chipsequtil-master/src/chipsequtil/.___init__.py has changed
Binary file chipsequtil-master/src/chipsequtil/._chipsequtil.py has changed
Binary file chipsequtil-master/src/chipsequtil/._motiftools.py has changed
Binary file chipsequtil-master/src/chipsequtil/._nib.py has changed
Binary file chipsequtil-master/src/chipsequtil/._plotting.py has changed
Binary file chipsequtil-master/src/chipsequtil/._sampling.py has changed
Binary file chipsequtil-master/src/chipsequtil/._seq.py has changed
Binary file chipsequtil-master/src/chipsequtil/._util.py has changed
--- a/chipsequtil-master/src/chipsequtil/__init__.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,5 +0,0 @@
-"""
-This module needs documentation.
-"""
-
-from chipsequtil import *
--- a/chipsequtil-master/src/chipsequtil/chipsequtil.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,718 +0,0 @@
-import math
-import os
-import re
-import string
-import sys
-
-from ConfigParser import ConfigParser
-from csv import DictReader
-from collections import defaultdict
-
-import chipsequtil
-
-# for RefGeneDB
-from util import KeyedBinaryTree
-
-
-def get_file_parts(path) :
-    """For <path>/<basename>.<ext>, returns 4-tuple (<path>,<basename>.<ext>,<basename>,<ext>)"""
-    path,fn = os.path.split(path)
-    basename,ext = os.path.splitext(fn)
-    return path,fn,basename,ext
-
-def parse_number(n) :
-    """Try to cast intput first to float, then int, returning unchanged if both fail"""
-    try :
-        return float(n) if '.' in n else int(n)
-    except :
-        return n
-
-
-def gerald_to_bed(gerald,min_fields=False) :
-    """Convert a GERALDOutput object into a BEDOutput object
-
-    Keyword argument *min_fields* produces BED alignment with only the first
-    three fields populated
-    """
-
-    d = {}.fromkeys(BEDOutput.FIELD_NAMES,'')
-
-    # required BED fields
-    d['chrom'] = gerald.match_chromo
-    d['chromStart'] = gerald.match_pos
-    d['chromEnd'] = gerald.match_pos+len(gerald.read)
-
-    # load the remaining information
-    if not min_fields :
-        d['strand'] = '+' if gerald.match_strand == 'F' else '-'
-        # TODO consider encoding single-read alignment score into BED score format
-        # that's it?
-    return BEDOutput(**d)
-
-
-class GERALDOutput :
-    """Container for one line of GERALD alignment output as generated by Illumina
-    pipeline version >= 1.3."""
-
-    FIELD_NAMES = ['machine',
-                   'run_number',
-                   'lane',
-                   'tile',
-                   'x_coord',
-                   'y_coord',
-                   'index',
-                   'read_no',
-                   'read',
-                   'quality_string',
-                   'match_chromo',
-                   'match_contig',
-                   'match_pos',
-                   'match_strand',
-                   'match_desc',
-                   'single_read_score',
-                   'paired_read_score',
-                   'partner_chromo',
-                   'partner_contig',
-                   'partner_offset',
-                   'partner_strand',
-                   'filtering',
-                   ]
-
-    def __init__(self,line) :
-
-        if type(line) == str :
-            line = line.strip().split('\t')
-
-        if len(line) != len(GERALDOutput.FIELD_NAMES) :
-            raise GERALDOutput.FormatException('Expected %d fields in input, \
-                                               found %d in line: %s'%
-                                               (len(GERALDOutput.FIELD_NAMES),
-                                                len(line),
-                                                line))
-
-        for fn,d in zip(GERALDOutput.FIELD_NAMES,line) :
-            setattr(self,fn,parse_number(d))
-
-    def __repr__(self) :
-        return 'GERALDOutput(%s)'%repr(self.output_format())
-
-    def output_format(self) :
-        """Tab delimited string of fields as they would appear in GERALD output file"""
-        return '\t'.join([str(getattr(self,d)) for d in GERALDOutput.FIELD_NAMES])+'\n'
-
-    class FormatException(Exception) :
-        """GERALD format exception, raised on malformatted input"""
-        pass
-
-
-class SmartFileIter :
-    r"""An 'abstract' class implementing a smart file iterator.  It is essentially
-    a wrapper around a collections.DictReader object that parses fields into
-    Python datatypes (int, float, tuple, objects, etc) as they are iterated.
-    The constructor argument *f* can be either a valid filename or a file-like
-    object.  This class should not be directly instantiated - rather it should
-    be subclassed with FIELD_NAMES and FIELD_TYPES defined.  FIELD_NAMES is a
-    list of strings referring to the names of the fields, FIELD_TYPES is a list
-    of the same length of callables that will parse the column into the desired
-    format. Example::
-
-      >>> s = StringIO('chr1\t0\t100\t+\nchr3\t300\t601\t-\n')
-      >>> class IntervalFile(SmartFileIter):
-              r'''A SmartFileIter for files with lines formatted like:
-                    chrom\tstart\tend\tstrand'''
-              FIELD_NAMES = ['chrom','start','end','strand']
-              FIELD_TYPES= [str,int,int,lambda x: 0 if x == '+' else 1]
-      >>> f = IntervalFile(s)
-      >>> for r in f :
-              print r['chrom'], 'length: ', r['end']-r['start'], 'strand: ',r['strand']
-
-    ``r['start']`` and ``r['end']`` are automatically available as integers,
-    so the subraction works as expected.  Arbitrary functions that accept a
-    single argument and return a value may also be specified.
-    """
-
-    def __init__(self,f,skip_line_chars='#') :
-        if not hasattr(self,'FIELD_NAMES') or not hasattr(self,'FIELD_TYPES') :
-            raise Exception('Subclasses must define class members FIELD_NAMES and FIELD_TYPES')
-        if isinstance(f,str) :
-            f = open(f)
-        self._dict_reader = DictReader(f,delimiter='\t',fieldnames=self.FIELD_NAMES)
-        self.fieldnames = self.FIELD_NAMES
-        self.curr_line = self._dict_reader.next()
-        self.skip_line_chars = skip_line_chars
-
-        # skip initial comment lines
-        while self.curr_line[self.FIELD_NAMES[0]][0] in self.skip_line_chars :
-            self.curr_line = self._dict_reader.next()
-
-        if self.FIELD_NAMES[0] in self.curr_line.values() :
-            self.curr_line = self._dict_reader.next()
-
-    def __iter__(self) :
-        return self
-
-    def __getattr__(self,attr) :
-        try:
-            return self.__dict__[attr]
-        except KeyError :
-            return getattr(self._dict_reader,attr)
-
-    def next(self) :
-        """Emit the next record in the file as a dictionary with parsed values"""
-
-        if self.curr_line is None :
-            raise StopIteration()
-
-        line = self.curr_line
-
-        # check for comment
-        while line[self.FIELD_NAMES[0]][0] in self.skip_line_chars :
-            line = self.curr_line = self._dict_reader.next()
-
-        for k,f in zip(self.FIELD_NAMES, self.FIELD_TYPES) :
-            try :
-                line[k] = f(line[k])
-            except Exception, e :
-                #sys.stderr.write('Warning: field %s on line %d could not be properly formatted, exception %s\n'%(k,self._dict_reader.reader.line_num,str(e)))
-                line[k] = line[k]
-
-        try :
-            self.curr_line = self._dict_reader.next()
-        except StopIteration :
-            self.curr_line = None
-
-        return line
-
-
-class BEDOutput :
-    """*Deprecated*: Use *BEDFile* instead.
-
-    Container for one line of BED alignment output"""
-
-    FIELD_NAMES = ['chrom',
-                   'chromStart',
-                   'chromEnd',
-                   'name',
-                   'score',
-                   'strand',
-                   'thickStart',
-                   'thickEnd',
-                   'itemRgb',
-                   'blockCount',
-                   'blockSizes',
-                   'blockStarts',
-                   ]
-
-    def __init__(self,line='',*args,**kwargs) :
-
-        if type(line) == str :
-            line = line.strip().split('\t')
-
-        if len(line) < 3 and any([x not in kwargs.keys() for x in ['chrom','chromStart','chromEnd']]) :
-            raise BEDOutput.FormatException('Format requres at least 3 fields in \
-                                            input, found %d in line: %s'%(len(line),line))
-        if len(line) > len(BEDOutput.FIELD_NAMES) :
-            raise BEDOutput.FormatException('Format requres at most %d fields in \
-                                             input, found %d in line: %s'%
-                                             (len(BEDOutput.FIELD_NAMES),len(line),line))
-
-        empty_fields = ['']*(len(BEDOutput.FIELD_NAMES)-len(line))
-        for fn,d in zip(BEDOutput.FIELD_NAMES,line+empty_fields) :
-            setattr(self,fn,parse_number(d))
-
-        # kwargs override line input
-        for k,v in kwargs.items() :
-            setattr(self,k,parse_number(v))
-
-    def __repr__(self) :
-        return 'BEDOutput(%s)'%(repr(self.output_format()))
-
-    def output_format(self) :
-        """Returns a string for the BED line as it would appear in a file"""
-        return '\t'.join([str(getattr(self,d)) for d in BEDOutput.FIELD_NAMES])+'\n'
-
-    class FormatException(Exception) :
-        """BED format exception, raised on malformatted input"""
-        pass
-
-
-class BEDFile(SmartFileIter) :
-    '''An iterable object containing the records in the supplied BED formatted
-    file.  Fieldnames are::
-
-        FIELD_NAMES = ['chrom',
-                       'chromStart',
-                       'chromEnd',
-                       'name',
-                       'score',
-                       'strand',
-                       'thickStart',
-                       'thickEnd',
-                       'itemRgb',
-                       'blockCount',
-                       'blockSizes',
-                       'blockStarts',
-                       ]
-    '''
-
-    FIELD_NAMES = BEDOutput.FIELD_NAMES
-    FIELD_TYPES = [str,int,int,str,float,str,int,int,str,lambda x: x.split(','), lambda x: x.split(','), lambda x: x.split(',')]
-
-
-class BEDFile_dictreader(DictReader) :
-    '''An iterable object (subclasses csv.DictReader) containing the records in
-    the supplied BED formatted file.'''
-    FIELD_NAMES = BEDOutput.FIELD_NAMES
-    def __init__(self,bed) :
-        '''*bed* is either a filename or a file-like object representing a BED file'''
-        if isinstance(bed,str) :
-            bed = open(bed)
-        DictReader.__init__(self,bed,delimiter='\t',
-                            fieldnames=BEDOutput.FIELD_NAMES)
-
-
-class GPSFile(SmartFileIter) :
-    '''An iterable object containing the records in the peaks file format
-    generated by GPS. Fieldnames are::
-
-        FIELD_NAMES = ["Position",
-                       "IP",
-                       "Control",
-                       "Fold",
-                       "Q_-lg10",
-                       "P_-lg10",
-                       "IPvsEMP",
-                       "IPvsCTR",
-                       "blank"
-                      ]
-    '''
-
-    FIELD_NAMES = ["Position",
-                   "IP",
-                   "Control",
-                   "Fold",
-                   "Q_-lg10",
-                   "P_-lg10",
-                   "IPvsEMP",
-                   "IPvsCTR",
-                   "blank"
-                  ]
-
-    FIELD_TYPES = [lambda x: ('chr%s'%x.split(':')[0],int(x.split(':')[1]),x),
-                   float,
-                   float,
-                   float,
-                   float,
-                   float,
-                   float,
-                   float,
-                   str
-                  ]
-
-    def __init__(self,gps_fn) :
-        f = open(gps_fn)
-
-        SmartFileIter.__init__(self,f)
-
-
-class AffyBiocFile(DictReader) :
-    '''An iterable object (subclasses csv.DictReader) containing microarray data records in
-    the supplied bioconductor formatted file.'''
-
-    FIELD_NAMES = [ 'ID',
-                    'Symbol',
-                    'Name',
-                    'M',
-                    'A',
-                    't',
-                    'P.Value',
-                    'B'
-                  ]
-
-    def __init__(self,affyfn) :
-        '''*affyfn* is either a filename or a file-like object representing a bioconductor output file'''
-        if isinstance(affyfn,str) :
-            bed = open(bed)
-        DictReader.__init__(self,bed,delimiter='\t',
-                            fieldnames=BEDOutput.FIELD_NAMES)
-
-
-class RefGeneOutput(object) :
-    # http://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/refGene.sql
-    FIELD_NAMES = ['bin',
-                   'name',
-                   'chrom',
-                   'strand',
-                   'txStart',
-                   'txEnd',
-                   'cdsStart',
-                   'cdsEnd',
-                   'exonCount',
-                   'exonStarts',
-                   'exonEnds',
-                   'score',
-                   'name2',
-                   'cdsStartStat',
-                   'cdsEndStat',
-                   'exonFrames',]
-
-
-class RefGeneFile(DictReader) :
-    '''An iterable object (subclasses csv.DictReader) containing the records in
-    the supplied BED formatted file'''
-    def __init__(self,refGene_fn) :
-        refGene_f = open(refGene_fn)
-        # check for header
-        first_line = refGene_f.next()
-        if not first_line.strip().startswith('#') :
-            refGene_f.seek(0) # first line not header, reset the file pointer
-        DictReader.__init__(self,refGene_f,delimiter='\t',fieldnames=RefGeneOutput.FIELD_NAMES)
-
-class RefGeneFile_nottested(SmartFileIter) :
-    '''An iterable object containing the records in the supplied UCSC RefGene
-    refFlat formatted file (see e.g.
-    http://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/refGene.sql)'''
-    FIELD_NAMES = ['bin',
-                   'name',
-                   'chrom',
-                   'strand',
-                   'txStart',
-                   'txEnd',
-                   'cdsStart',
-                   'cdsEnd',
-                   'exonCount',
-                   'exonStarts',
-                   'exonEnds',
-                   'score',
-                   'name2',
-                   'cdsStartStat',
-                   'cdsEndStat',
-                   'exonFrames',]
-    FIELD_TYPES = [str,str,str,str,int,int,int,int,int,
-                   lambda x: [int(y) for y in x.split(',') if len(y) > 0],
-                   lambda x: [int(y) for y in x.split(',') if len(y) > 0],
-                   float,
-                   str,str,str,str]
-
-class KnownGeneFile(SmartFileIter) :
-    '''An iterable that parses UCSC's KnownGene gene annotation files.  Field
-    names are::
-
-        FIELD_NAMES = [ 'name',
-                        'chrom',
-                        'strand',
-                        'txStart',
-                        'txEnd',
-                        'cdsStart',
-                        'cdsEnd',
-                        'exonCount',
-                        'exonStarts',
-                        'exonEnds',
-                        'proteinID',
-                        'alignID',
-                      ]
-'''
-
-    FIELD_NAMES = [ 'name',
-                    'chrom',
-                    'strand',
-                    'txStart',
-                    'txEnd',
-                    'cdsStart',
-                    'cdsEnd',
-                    'exonCount',
-                    'exonStarts',
-                    'exonEnds',
-                    'proteinID',
-                    'alignID',
-                  ]
-
-    # function pointers for correct formatting of field names
-    FIELD_TYPES = [ str,
-                    str,
-                    str,
-                    int,
-                    int,
-                    int,
-                    int,
-                    lambda x: [int(y) for y in x.split(',') if len(y) > 0],
-                    lambda x: [int(y) for y in x.split(',') if len(y) > 0],
-                    lambda x: [int(y) for y in x.split(',') if len(y) > 0],
-                    str,
-                    str,
-                  ]
-
-    def __init__(self,kg_fn) :
-        self.meta_data = []
-        self.file_info = {}
-        f = open(kg_fn)
-        self._dict_reader = DictReader(f,delimiter='\t',fieldnames=KnownGeneFile.FIELD_NAMES)
-
-    def __iter__(self) :
-        return self
-
-    def next(self) :
-        line = self._dict_reader.next()
-        for k,f in zip(self.FIELD_NAMES,self.FIELD_TYPES) :
-            line[k] = f(line[k])
-        return line
-
-
-#TODO maybe, finish this
-class RefGeneDB :
-    '''A class for querying RefGene annotation files. NOT DONE.'''
-
-    def __init__(self,refgene_fn) :
-        self._chrom_trees = defaultdict(KeyedBinaryTree)
-        refgene_f = RefGeneFile(refgene_fn)
-        genes = defaultdict(list)
-        for gene in refgene_f :
-            genes[gene['chrom']].append(gene)
-
-        # do stuff to ensure a balanced tree for each chromosome
-        for chrom,gene_list in genes.items() :
-            gene_list.sort(key=lambda x: int(x['txStart']))
-            first_half, second_half = gene_list[:len(gene_list)/2],gene_list[len(gene_list)/2:]
-            first_half.reverse()
-            for i in range(min(len(first_half,second_half))) :
-                to_add = first_half.pop(i)
-                self._chrom_trees[chrom].addNode(int(to_add['txStart']),to_add)
-
-
-class MACSFile(SmartFileIter) :
-    '''An iterable object containing the records in the supplied MACS peak file.
-    This class parses the comments found in the header of MACS peak files and
-    extracts metadata into the member dictionary **file_info**.  Here is an example
-    metadata dictionary::
-
-      >>> f = MACSFile('macs_peaks.xls')
-      >>> f.file_info
-          {'ChIP-seq file': 'experiment_read_alignments.sam',
-           'MACS version': '1.4.0rc2 20110214',
-           'Range for calculating regional lambda': '1000 bps and 10000 bps',
-           'Redundant rate in control': 0.72999999999999998,
-           'Redundant rate in treatment': 0.080000000000000002,
-           'band width': 300,
-           'control file': 'control_read_alignments.sam',
-           'd': 203,
-           'effective genome size': 2110000000.0,
-           'format': 'SAM',
-           'maximum duplicate tags at the same position in control': 2,
-           'maximum duplicate tags at the same position in treatment': 2,
-           'model fold': '10,30',
-           'name': 'my_awesome_ChIP',
-           'pvalue cutoff': 1.0000000000000001e-05,
-           'tag size': 36,
-           'tags after filtering in control': 7879454,
-           'tags after filtering in treatment': 23927336,
-           'total tags in control': 29703098,
-           'total tags in treatment': 26092366}
-
-    The complete header can be found as a list in the **meta_data** member with
-    one comment per item.  The field names available are::
-
-        FIELD_NAMES = ['chr',
-                       'start',
-                       'end',
-                       'length',
-                       'summit',
-                       'tags',
-                       '-10*log10(pvalue)',
-                       'fold_enrichment',
-                       'FDR(%)',
-                      ]
-
-    '''
-    FIELD_NAMES = ['chr',
-                   'start',
-                   'end',
-                   'length',
-                   'summit',
-                   'tags',
-                   '-10*log10(pvalue)',
-                   'fold_enrichment',
-                   'FDR(%)',
-                  ]
-
-    FIELD_TYPES = [str,
-                   int,
-                   int,
-                   int,
-                   int,
-                   int,
-                   float,
-                   float,
-                   float
-                  ]
-
-    _METADATA_REGEXES = [
-            u'# This file is generated by (MACS version) (.*)',
-            u'# (name) = (.*)',
-            u'# (format) = (.*)',
-            u'# (ChIP-seq file) = (.*)',
-            u'# (control file) = (.*)',
-            u'# (effective genome size) = (.*)',
-            u'# (band width) = (\d+)',
-            u'# (model fold) = (.*)',
-            u'# (pvalue cutoff) = (.*)',
-            u'# (Range for calculating regional lambda) is: (.*)',
-            u'# (tag size) is determined as (\d+) bps',
-            u'# (total tags in treatment): (\d+)',
-            u'# (tags after filtering in treatment): (\d+)',
-            u'# (maximum duplicate tags at the same position in treatment) = (\d+)',
-            u'# (Redundant rate in treatment): (.*)',
-            u'# (total tags in control): (.*)',
-            u'# (tags after filtering in control): (.*)',
-            u'# (maximum duplicate tags at the same position in control) = (\d+)',
-            u'# (Redundant rate in control): (.*)',
-            u'# (d) = (\d+)'
-            ]
-
-    def __init__(self,macs_fn) :
-        self.meta_data = []
-        self.file_info = {}
-        if isinstance(macs_fn,str) :
-            f = open(macs_fn)
-        else :
-            f = macs_fn
-        done_with_header = False
-        while not done_with_header :
-            l = f.next().strip()
-            if l.startswith('#') :
-                for regex in MACSFile._METADATA_REGEXES :
-                    m = re.search(regex,l)
-                    if m is not None :
-                        self.file_info[m.group(1).strip()] = parse_number(m.group(2).strip())
-                self.meta_data.append(l)
-            elif l.startswith('\t'.join(MACSOutput.FIELD_NAMES[:5])) :
-                self.meta_data.append(l)
-                done_with_header = True
-
-        SmartFileIter.__init__(self,f)
-
-
-# for backwards compatibility, use MACSFile instead...?
-class MACSOutput(object) :
-    FIELD_NAMES = MACSFile.FIELD_NAMES
-
-GLOBAL_SETTINGS_FN = os.path.join(os.path.split(chipsequtil.__file__)[0],'org_settings.cfg')
-LOCAL_SETTINGS_FN = os.path.expanduser(os.path.join('~','.org_settings.cfg'))
-_ALL_SETTINGS, _LOCAL_SETTINGS, _GLOBAL_SETTINGS = range(3)
-
-def _get_org_settings(org_key=None,addnl_configs=[],src=_ALL_SETTINGS) :
-    """Utility function used by get_org_settings and get_all_settings, should \
-    not be called directly"""
-
-    config = ConfigParser()
-    chipsequtil_base =     conf_fns = []
-    if src in [_LOCAL_SETTINGS, _ALL_SETTINGS] :
-        conf_fns.append(LOCAL_SETTINGS_FN)
-    if src in [_GLOBAL_SETTINGS, _ALL_SETTINGS] :
-        conf_fns.append(GLOBAL_SETTINGS_FN)
-    config.read(conf_fns+addnl_configs)
-
-    d = {}
-    if org_key is None :
-        for sec in config.sections() :
-            # try to cast numeric-looking arguments into float, int
-            d[sec] = dict([(k,parse_number(v)) for k,v in config.items(sec)])
-    else :
-        d = dict([(k,parse_number(v)) for k,v in config.items(org_key)])
-
-    return d
-
-
-def get_org_settings(org_key,addnl_configs=[]) :
-    '''Returns a dict of setting/path values for a given organism as specified
-    in system-wide and user's settings. *org_key* is the organism name as found
-    in the config file, *e.g.* mm9.  *addnl_configs* are filenames of other
-    configuration files to add to the set of settings, usually not needed.
-    Example usage::
-
-      >>> org_d = get_org_settings('mm9')
-      >>> org_d
-          {'affy_to_known_path': '/nfs/genomes/mouse_gp_jul_07/anno/knownToMOE43-mm9.txt',
-           'annotation_path': '/nfs/genomes/mouse_gp_jul_07/anno/refFlat-mm9.txt',
-           'description': "UCSC mm9 (July '07 build) with full TRANSFAC hypothesis set",
-           'genome': 'mm9',
-           'genome_dir': '/nfs/genomes/mouse_gp_jul_07',
-           'genome_size': 2107000000,
-           'known_gene_anno_path': '/nfs/genomes/mouse_gp_jul_07/anno/knownGene-mm9.txt',
-           'known_gene_xref_path': '/nfs/genomes/mouse_gp_jul_07/anno/kgXref-mm9.txt',
-           'refgene_anno_path': '/nfs/genomes/mouse_gp_jul_07/anno/refFlat-mm9.txt',
-           'theme_hypotheses': '/nfs/vendata/cwng/TRANSFAC/2010_transfac_vert_all_filtic9.tamo',
-           'theme_markov': '/nfs/data/cwng/chipseq/hypotheses/Mouse.markov',
-           'ucsc_chrom_sizes': '/nfs/genomes/mouse_gp_jul_07/mm9.chrom.sizes'}
-      >>> get_org_settings('mm9')['genome_dir']
-          '/nfs/genomes/mouse_gp_jul_07'
-
-    '''
-    return _get_org_settings(org_key,addnl_configs=addnl_configs)
-
-
-def get_all_settings(addnl_configs=[]) :
-    '''Returns a dict of setting/path values for every organism as specified in
-    system-wide and user's settings.'''
-    return _get_org_settings(None,addnl_configs=addnl_configs)
-
-
-def get_global_settings() :
-    '''Returns a dict of the global setting/path values installed with the
-    package.'''
-    return _get_org_settings(None,src=_GLOBAL_SETTINGS)
-
-
-def get_local_settings() :
-    '''Returns a dict of the current user's setting/path values taken from
-    ~/.org_settings.cfg if it exists.'''
-    return _get_org_settings(None,src=_LOCAL_SETTINGS)
-
-
-def check_org_settings(org_key,setting_list) :
-    '''Returns true if all setting names in *setting_list* are found in the
-    org settings for organism *org_key* and false otherwise. Mostly used
-    internally to sanity check org settings.'''
-    settings = get_org_settings(org_key)
-    return all([s in settings.keys() for s in setting_list])
-
-
-RC_MAP = string.maketrans('acgtACGT','tgcaTGCA')
-def reverse_complement(seq) :
-    """Reverse complements nucleotide string *seq*.  Leaves non-nucleotide characters uneffected."""
-    return seq.translate(RC_MAP)[::-1]
-
-
-def get_gc_content(seq) :
-    '''returns the GC content of a DNA sequence as python string'''
-    seq = seq.lower()
-    return (seq.count('c')+seq.count('g'))/float(len(seq))
-
-
-def get_gc_content_distribution(sequences,bins=100) :
-    '''returns a list of
-    provided sequences.  Approximation is performed by binning.'''
-    gc_contents = [get_gc_content(s) for s in sequences]
-    gc_contents.sort()
-
-    # count up the sequences for each bin
-    bin_counts = [0.]*bins
-    for c in gc_contents :
-        sample_bin = int(math.floor(c*bins))
-        bin_counts[sample_bin] += 1
-
-    # normalize bin counts
-    norm_bins = [x/len(sequences) for x in bin_counts]
-
-    # create a closure for this set of sequences
-    #def f(seq) :
-    #    gc = get_gc_content(seq)
-    #    return norm_bins[int(math.floor(gc*bins))]
-
-    return norm_bins
-
-
-def get_size_distribution(sequences) :
-    return (len(s) for s in sequences)
-
-
-
--- a/chipsequtil-master/src/chipsequtil/motiftools.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,2064 +0,0 @@
-"""
-There is a large number of functions and member fucntions here.  To get started,
-a motif can be instantiated by providing an ambiguity code, a set of aligned DNA
-sequences, or from matrices of counts, probabilities or log-likelihoods (akaPSSMs).
-
->>> m =  MotifTools.Motif_from_text('TGAAACANNSYWT')
->>> print m.oneletter()
-TGAAACA..sywT
-
-Lower case reflects lower information content.  For a more detailed view of the distribution
-of information, try this::
-
-    >>> m.textlogo()
-    #                -- 2.30 bits
-    #
-    #  TGAAACA     T
-    #  TGAAACA     T
-    #  TGAAACA     T
-    #  TGAAACA     T
-    #  TGAAACA  CCAT
-    #  TGAAACA  CCAT
-    #  TGAAACA  GTTT
-    #  TGAAACA  GTTT -- 0.23 bits
-    #  -------------
-    #  TGAAACA..sywT
-
-
-Motif objects may be manipulated largely like text strings (with pythonic
-indexing)::
-
-    >>> print m[4:5].oneletter
-    A
-    >>> print m[4:7].oneletter
-    ACA
-    >>> print (m[4:7] + m[1:2]).oneletter
-    ACAG
-    >>> print (m[4:7] + m[1:7]).oneletter
-    ACAGAAACA
-
-and even padded with blanks::
-
-    >>> print  m[-4:7]
-    ...TGAAACA
-
-.. Copyright (2005) Whitehead Institute for Biomedical Research
-.. All Rights Reserved
-
-Author: David Benjamin Gordon
-
-Modified by: Adam Labadorf
-
-"""
-import copy
-import math
-import os
-import pickle
-import re
-import string
-import sys
-import tempfile
-
-pysum = sum
-
-from random import random,shuffle
-from subprocess import call
-
-from chipsequtil import reverse_complement
-class MotifToolsException(Exception) : pass
-
-one2two = {  'W':'AT',    'M':'AC',   'R':'AG',
-             'S':'CG',    'Y':'CT',   'K':'GT'}
-two2one = { 'AT': 'W',   'AC': 'M',  'AG': 'R',
-            'CG': 'S',   'CT': 'Y',  'GT': 'K'}
-revcomp = { 'A':'T',      'T':'A',    'C':'G',   'G':'C',
-            'W':'W',      'S':'S',    'K':'M',   'M':'K',
-            'Y':'R',      'R':'Y',    'N':'N',
-            'B':'N', 'D':'N', 'H':'N', 'V':'N', ' ':'N'}  #[12-11-02] Needs fixing
-
-ACGT = list('ACGT')
-YEAST_BG = {'A': 0.31, 'C': .19, 'G': .19, 'T': .31} #Yeast default background freqs
-
-revcomplement_memo = {'A':'T'}
-revcompTBL = string.maketrans("AGCTagctWSKMYRnN", "TCGAtcgaWSMKTYnN")
-def revcomplement(seq):
-    """A quick reverse-complement routine that memo-izes queries, understands
-    IUPAC ambiguity codes, and preserves case."""
-    global revcomplement_memo
-    try:
-        rc = revcomplement_memo[seq]
-    except KeyError:
-        #_t = map(lambda x,D=revcomp: D[x], seq)
-        #get = revcomp.get
-        #_t = map(get, seq)
-        _t = list(seq.translate(revcompTBL))
-        _t.reverse()
-        rc = ''.join(_t)
-        revcomplement_memo[seq] = rc
-        revcomplement_memo[rc]  = seq
-    return rc
-
-
-def Motif_from_ll(ll):
-    """Constructs a motif object from a log-likelihood matrix, which is in the
-    form of a list of dictionaries."""
-    m = Motif(None,None)
-    m.compute_from_ll(ll)
-    return m
-
-def Motif_from_counts(countmat,beta=0.01,bg={'A':.25,'C':.25,'G':.25,'T':.25}):
-    """
-    Construct a Motif object from a matrix of counts (or probabilities or frequencies).
-    A default set of uniform background frequencies may be overridden.
-
-    beta refers to the number of pseudocounts that should be distributed over each position
-    of the PSSM."""
-    m = Motif('',bg)
-    m.compute_from_counts(countmat,beta)
-    return m
-
-def Motif_from_text(text,beta=0.05,source='',bg=None):
-    """Construct a Motif object from a text string constructed from IUPAC
-    ambiguity codes.
-
-    A default set of uniform background frequencies may be overridden with
-    a dictionary of the form {'A':.25,'C':.25,'G':.25,'T':.25}).
-
-    beta refers to the number of pseudocounts that should be distributed over each position
-    of the PSSM."""
-    if not bg: bg={'A':.25,'C':.25,'G':.25,'T':.25}
-    m = Motif('',bg)
-    m.compute_from_text(text,beta)
-    m.source = source
-    return m
-
-def copy(motif):
-    """Utility routine for copying motifs"""
-    a = copy.deepcopy(motif)
-    #a.__dict__ = motif.__dict__.copy()
-    return a
-
-class Motif:
-    """A pssm model, with scanning, storing, loading, and other operations. A
-    uniform nucleotide background is assumed if none is provided."""
-    def __init__(self,list_of_seqs_or_text=[],backgroundD=None):
-        self.MAP       = 0
-        self.evalue    = None
-        self.oneletter = ''
-        self.nseqs     = 0
-        self.counts    = []
-        self.width     = 0
-        self.fracs     = []
-        self.logP      = []
-        self.ll        = []
-        self.bits      = []
-        self.totalbits = 0
-        self.maxscore  = 0
-        self.minscore  = 0
-        self.pvalue      = 1
-        self.pvalue_rank = 1
-        self.church      = None
-        self.church_rank = 1
-        self.Cpvalue     = 1
-        self.Cpvalue_rank= 1
-        self.Cchurch     = 1
-        self.Cchurch_rank= 1
-        self.binomial    = None
-        self.binomial_rank=1
-        self.E_seq       = None
-        self.frac        = None
-        self.E_site      = None
-        self.E_chi2      = None
-        self.kellis      = None
-        self.MNCP        = None
-        self.ROC_auc     = None
-        self.realpvalue  = None
-        self.Cfrac       = None
-        self.CRA         = None
-        self.valid     = None
-        self.seeddist  = 0
-        self.seednum   = -1
-        self.seedtxt   = None
-        self.family    = None
-        self.source    = None
-        self.threshold = None
-        self._bestseqs = None
-        self.bgscale   = 1
-        self.best_pvalue = None
-        self.best_factor = None
-        self.gamma     = None
-        self.nbound    = 0
-        self.matchids  = []
-        self.overlap   = None
-        self.cumP      = []
-        self.numbound      = 0
-        self.nummotif      = 0
-        self.numboundmotif = 0
-        self.dataset = None
-        self.bgfile = None
-        self.cverror = None
-        self.beta = None
-        self.match_thresh = None
-        self.progscore = None
-        if backgroundD:
-            self.background = backgroundD
-        else:
-            #self.background = {'A': 0.31, 'C': .19, 'G': .19, 'T': .31} #Yeast Default
-            self.background = {'A':.25,'C':.25,'G':.25,'T':.25} # uniform background
-
-        if type(list_of_seqs_or_text) == type(''):
-            self.seqs = []
-            text = list_of_seqs_or_text
-            self.compute_from_text(text)
-        else:
-            self.seqs = list_of_seqs_or_text
-        if self.seqs:
-            self._parse_seqs(list_of_seqs_or_text)
-            self._compute_ll()
-            self._compute_oneletter()
-            #self._compute_threshold(2.0)
-
-    def __repr__(self):
-        return "%s (%d)"%(self.oneletter, self.nseqs)
-
-    def __str__(self):
-        return "%s (%d)"%(self.oneletter, self.nseqs)
-
-    def summary(self):
-        """return a text string one-line summary of motif and its metrics"""
-        m = self
-        txt = "%-34s (Bits: %5.2f  MAP: %7.2f   D: %5.3f  %3d)  E: %7.3f"%(
-            m, m.totalbits, m.MAP, m.seeddist, m.seednum, nlog10(m.pvalue))
-        if m.binomial!=None:  txt = txt + '  Bi: %6.2f'%(nlog10(m.binomial))
-        if m.church != None:  txt = txt + '  ch: %6.2f'%(nlog10(m.church))
-        if m.frac   != None:  txt = txt + '  f: %5.3f'%(m.frac)
-        if m.E_site != None:  txt = txt + '  Es: %6.2f'%(nlog10(m.E_site))
-        if m.E_seq  != None:  txt = txt + '  Eq: %6.2f'%(nlog10(m.E_seq))
-        if m.MNCP   != None:  txt = txt + '  mn: %6.2f'%(m.MNCP)
-        if m.ROC_auc!= None:  txt = txt + '  Ra: %6.4f'%(m.ROC_auc)
-        if m.E_chi2 != None:
-            if m.E_chi2 == 0: m.E_chi2=1e-20
-            txt = txt + ' x2: %5.2f'%(nlog10(m.E_chi2))
-        if m.CRA    != None:  txt = txt + '  cR: %6.4f'%(m.CRA)
-        if m.Cfrac  != None:  txt = txt + '  Cf: %5.3f'%(m.Cfrac)
-        if m.realpvalue != None: txt = txt + '  P: %6.4e'%(m.realpvalue)
-        if m.kellis != None:  txt = txt +  '  k: %6.2f'%(m.kellis)
-        if m.numbound      :  txt = txt +  '  b: %3d'%(m.numbound)
-        if m.nummotif      :  txt = txt +  '  nG: %3d'%(m.nummotif)
-        if m.numboundmotif :  txt = txt +  '  bn: %3d'%(m.numboundmotif)
-
-        return txt
-
-    def minimal_raw_seqs(self):
-        '''return minimal list of seqs that represent consensus '''
-        seqs = [[], []]
-        for letter in self.oneletter:
-            if one2two.has_key(letter):
-                seqs[0].append(one2two[letter][0])
-                seqs[1].append(one2two[letter][1])
-            else:
-                seqs[0].append(letter)
-                seqs[1].append(letter)
-        if ''.join(seqs[0]) == ''.join(seqs[1]):
-            return  [''.join(seqs[0])]
-        else:
-            return  [''.join(seqs[0]), ''.join(seqs[0])]
-    def _compute_oneletter(self):
-        """set the oneletter member variable"""
-        letters = []
-        for i in range(self.width):
-            downcase = None
-            if self.bits[i] < 0.25:
-                letters.append('.')
-                continue
-            if self.bits[i] < 1.0: downcase = 'True'
-            tups = [(self.ll[i][x],x) for x in ACGT if self.ll[i][x] > 0.0]
-            if not tups:  #Kludge if all values are negative (can this really happen?)
-                tups = [(self.ll[i][x],x) for x in ACGT]
-                tups.sort()
-                tups.reverse()
-                tups = [tups[0]]
-                downcase = 'True'
-            tups.sort()      #Rank by LL
-            tups.reverse()
-            bases = [x[1] for x in tups[0:2]]
-            bases.sort()
-            if len(bases) == 2: L = two2one[''.join(bases)]
-            else:               L = bases[0]
-            if downcase: L = L.lower()
-            letters.append(L)
-        self.oneletter = ''.join(letters)
-    def _parse_seqs(self, LOS):
-        """build a matrix of counts from a list of sequences"""
-        self.nseqs = len(LOS)
-        self.width = len(LOS[0])
-        for i in range(self.width):
-            Dc = {'A': 0, 'C': 0, 'T': 0, 'G': 0, 'N': 0}
-            for seq in LOS:
-                key = seq[i]
-                Dc[key] = Dc[key] + 1
-            del(Dc['N'])
-            self.counts.append(Dc)
-
-    def _compute_ll(self):
-        """compute the log-likelihood matrix from the count matrix"""
-        self.fracs = []
-        self.logP  = []
-        self.ll    = []
-        for i in range(self.width):
-
-            Dll  = {'A': 0, 'C': 0, 'T': 0, 'G': 0}
-            Df   = {'A': 0, 'C': 0, 'T': 0, 'G': 0}
-            DlogP= {'A': 0, 'C': 0, 'T': 0, 'G': 0}
-
-            for nuc in self.counts[i].keys():
-
-                #print i,nuc,self.counts[i][nuc],self.nseqs
-                # Dll[nuc] = log2( position nucleotide count/background sequence count )
-                # Dll[nuc] = log2( (count[nuc]+bgscale*bg[nuc])/(bg[nuc]*(num_seqs+bgscale)) )
-
-                pos_nuc_count = self.counts[i][nuc] + self.bgscale*self.background.get(nuc,0.)
-                adj_all_nuc_count = (self.nseqs + self.bgscale) * self.background.get(nuc,1e-10)
-
-                Dll[nuc] = math.log(pos_nuc_count/adj_all_nuc_count,2)
-
-                Pij = self.counts[i][nuc] / float(self.nseqs)
-                Df [nuc] = Pij
-                if Pij > 0:
-                    DlogP[nuc]  = math.log(Pij) / math.log(2.)
-                else:
-                    DlogP[nuc]  = -100  #Near zero
-
-            self.fracs.append(Df)
-            self.logP.append (DlogP)
-            self.ll.append   (Dll)
-        self.P = self.fracs
-        self._compute_bits()
-        self._compute_ambig_ll()
-        self._maxscore()
-
-
-    def compute_from_ll(self,ll):
-        """build motif from an inputed log-likelihood matrix
-
-        (This function reverse-calculates the probability matrix and background frequencies
-        that were used to construct the log-likelihood matrix)
-        """
-        self.ll    = ll
-        self.width = len(ll)
-        self._compute_bg_from_ll()
-        self._compute_logP_from_ll()
-        self._compute_ambig_ll()
-        self._compute_bits()
-        self._compute_oneletter()
-        self._maxscore()
-
-    def _computeP(self):
-        """compute the probability matrix (from the internal log-probability matrix)"""
-        P = []
-        for i in range(self.width):
-            #print i,
-            _p = {}
-            for L in ACGT: _p[L] = math.pow(2.0,self.logP[i][L])
-            P.append(_p)
-        #print
-        self.P = P
-
-    def _compute_bits(self):
-        """set m.totbits to the number of bits and m.bits to a list of bits at
-        each position"""
-        bits = []
-        totbits = 0
-        bgbits  = 0
-        bg      = self.background
-        UNCERT  = lambda x: x*math.log(x)/math.log(2.0)
-        for letter in ACGT:
-            bgbits = bgbits + UNCERT(bg[letter])
-        for i in range(self.width):
-            tot = 0
-            for letter in ACGT:
-                Pij = pow(2.0, self.logP[i][letter])
-                tot = tot + UNCERT(Pij)
-                #bit = Pij * self.ll[i][letter]
-                #if bit > 0:
-                #    tot = tot + bit
-            #print tot, bgbits, tot-bgbits
-            bits.append(max(0,tot-bgbits))
-            totbits = totbits + max(0,tot-bgbits)
-        self.bits = bits
-        self.totalbits = totbits
-
-
-    def denoise(self,bitthresh=0.5):
-        """set low-information positions (below bitthresh) to Ns"""
-        for i in range(self.width):
-            tot = 0
-            for letter in ACGT:
-                if self.logP:
-                    Pij = pow(2.0, self.logP[i][letter])
-                else:
-                    Pij = pow(2.0, self.ll[i][letter]) * self.background[letter]
-                if Pij > 0.01:
-                    bit = Pij * self.ll[i][letter]
-                    tot = tot + bit
-            if tot < bitthresh:  #Zero Column
-                for letter in ACGT:
-                    self.ll[i][letter] = 0.0
-        self.compute_from_ll(self.ll)
-
-    def giflogo(self,id,title=None,scale=0.8,info_str=''):
-        """make a gif sequence logo"""
-        return giflogo(self,id,title,scale)
-
-    def printlogo(self,norm=2.3, height=10.0):
-        """print a text-rendering of the Motif Logo
-
-        norm
-            maximum number of bits to show
-        height
-            number of lines of text to use to render logo
-        """
-        self._print_bits(norm,height)
-    def print_textlogo(self,norm=2.3, height=8.0):
-        """print a text-rendering of the Motif Logo
-
-        norm
-            maximum number of bits to show
-        height
-            number of lines of text to use to render logo
-        """
-        self._print_bits(norm,height)
-    def _print_bits(self,norm=2.3, height=8.0):
-        """print a text-rendering of the Motif Logo
-
-        norm
-            maximum number of bits to show
-        height
-            number of lines of text to use to render logo
-        """
-        bits   = []
-        tots   = []
-        str    = []
-        for i in range(self.width):
-            D = {}
-            tot = 0
-            for letter in ['A', 'C', 'T', 'G']:
-                if self.logP:
-                    Pij = pow(2.0, self.logP[i][letter])
-                else:
-                    Pij = pow(2.0, self.ll[i][letter]) * self.background[letter]
-                if Pij > 0.01:
-                    '''Old'''
-                    D[letter] = Pij * self.ll[i][letter]
-                    #'''new'''
-                    #Q = self.background[letter]
-                    #D[letter] = ( Pij * math.log(Pij) - Pij * math.log(Q) ) / math.log(2.0)
-                    '''for both old and new'''
-                    tot = tot + D[letter]
-            bits.append(D)
-            tots.append(tot)
-        for i in range(self.width):
-            s = []
-            _l = bits[i].keys()
-            _l.sort(lambda x,y,D=bits[i]: cmp(D[y],D[x]))
-            for key in _l:
-                for j in range(int(bits[i][key] / norm * height)):
-                    s.append(key)
-            str.append(''.join(s))
-        fmt = '%%%ds'%height
-        print '#  %s'%('-'*self.width)
-        for h in range(int(height)):
-            sys.stdout.write("#  ")
-            for i in range(self.width):
-                sys.stdout.write((fmt%str[i])[h])
-            if h == 0:
-                sys.stdout.write(' -- %4.2f bits\n'%norm)
-            elif h == height-1:
-                sys.stdout.write(' -- %4.2f bits\n'%(norm/height))
-            else:
-                sys.stdout.write('\n')
-        print '#  %s'%('-'*self.width)
-        print '#  %s'%self.oneletter
-
-    def _compute_ambig_ll(self):
-        """extend log-likelihood matrix to include ambiguity codes
-        e.g.  What the score of a 'S'?  Here we use the max of C and G."""
-        for Dll in self.ll:
-            for L in one2two.keys():
-                Dll[L] = max(Dll[one2two[L][0]],  Dll[one2two[L][1]] )
-            Dll['N'] = 0.0
-            Dll['B'] = 0.0
-
-    def compute_from_nmer(self,nmer,beta=0.001):  #For reverse compatibility
-        """See compute_from_text.  Here for reverse compatibility"""
-        self.compute_from_text(nmer,beta)
-
-    def compute_from_text(self,text,beta=0.001):
-        """compute a matrix values from a text string of ambiguity codes.
-        Use Motif_from_text utility instead to build motifs on the fly."""
-        prevlett = {'B':'A', 'D':'C', 'V':'T', 'H':'G'}
-        countmat = []
-        text = re.sub('[\.\-]','N',text.upper())
-        for i in range(len(text)):
-            D = {'A': 0, 'C': 0, 'T':0, 'G':0}
-            letter = text[i]
-            if letter in ['B', 'D', 'V', 'H']:  #B == no "A", etc...
-                _omit = prevlett[letter]
-                for L in ACGT:
-                    if L != _omit: D[L] = 0.3333
-            elif one2two.has_key(letter):  #Covers WSMYRK
-                for L in list(one2two[letter]):
-                    D[L] = 0.5
-            elif letter == 'N':
-                for L in D.keys():
-                    D[L] = self.background[L]
-            elif letter == '@':
-                for L in D.keys():
-                    D[L] = self.background[L]-(0.0001)
-                D['A'] = D['A'] + 0.0004
-            else:
-                D[letter] = 1.0
-            countmat.append(D)
-        self.compute_from_counts(countmat,beta)
-
-    def new_bg(self,bg):
-        """change the ACGT background frequencies to those in the supplied dictionary.
-        Recompute log-likelihood, etc. with new background.
-        """
-        counts = []
-        for pos in self.logP:
-            D = {}
-            for L,lp in pos.items():
-                D[L] = math.pow(2.0,lp)
-            counts.append(D)
-        self.background = bg
-        self.compute_from_counts(counts,0)
-
-    def addpseudocounts(self,beta=0):
-        """add pseudocounts uniformly across the matrix"""
-        self.compute_from_counts(self.counts,beta)
-
-    def compute_from_counts(self,countmat,beta=0):
-        """build a motif object from a matrix of letter counts."""
-        self.counts  = countmat
-        self.width   = len(countmat)
-        self.bgscale = 0
-
-        maxcount = 0
-        #Determine Biggest column
-        for col in countmat:
-            tot = pysum(col.values())
-            if tot > maxcount :
-                maxcount = tot
-
-        #Pad counts of remaining columns
-        for col in countmat:
-            tot = pysum(col.values())
-            pad = maxcount - tot
-            for L in col.keys():
-                col[L] = col[L] + pad * self.background.get(L,0.)
-
-        self.nseqs = maxcount
-        nseqs = maxcount
-
-        #Add pseudocounts
-        if beta > 0:
-            multfactor = {}
-            bgprob = self.background
-            pcounts= {}
-            for L in bgprob.keys():
-                pcounts[L] = beta*bgprob[L]*nseqs
-            for i in range(self.width):
-                for L in countmat[i].keys():
-                    _t = (countmat[i][L] + pcounts[L]) #Add pseudo
-                    _t = _t / (1.0 + beta)    #Renomalize
-                    countmat[i][L] = _t
-
-        #Build Motif
-        self.counts = countmat
-        self._compute_ll()
-        self._compute_oneletter()
-        self._maxscore()
-
-
-    def _compute_bg_from_ll(self):
-        """compute background model from log-likelihood matrix
-        by noting that:   pA  + pT  + pC  + pG  = 1
-                  and     bgA + bgT + bgC + bgG = 1
-                  and     bgA = bgT,   bgC = bgG
-                  and so  bgA = 0.5 - bgC
-                  and     pA  = lA * bgA,  etc for T, C, G
-                  so...
-                         (lA + lT)bgA + (lC + lG)bgC          =  1
-                         (lA + lT)bgA + (lC + lG)(0.5 - bgA)  =  1
-                         (lA + lT - lC - lG)bgA +(lC +lG)*0.5 =  1
-                          bgA                                 =  {1 - 0.5(lC + lG)} / (lA + lT - lC - lG)
-        + Gain accuracy by taking average of bgA over all positions of PSSM
-        """
-
-        pow = math.pow
-        bgATtot = 0
-        nocount = 0
-        near0   = lambda x:(-0.01 < x and x < 0.01)
-        for i in range(self.width):
-            _D = self.ll[i]
-            ATtot = pow(2,_D['A']) + pow(2,_D['T'])
-            GCtot = pow(2,_D['C']) + pow(2,_D['G'])
-            if near0(_D['A']) and near0(_D['T']) and near0(_D['G']) and near0(_D['C']):
-                nocount = nocount + 1
-                continue
-            if near0(ATtot-GCtot):     #Kludge to deal with indeterminate case
-                nocount = nocount + 1
-                continue
-            bgAT   = (1.0 - 0.5*GCtot)/(ATtot - GCtot)
-            if (bgAT < 0.1) or (bgAT > 1.1):
-                nocount = nocount + 1
-                continue
-            bgATtot = bgATtot + bgAT
-        if nocount == self.width:  #Kludge to deal with different indeterminate case
-            self.background = {'A':0.25, 'C':0.25, 'G':0.25, 'T':0.25}
-            return
-        bgAT = bgATtot / (self.width - nocount)
-        bgGC = 0.5 - bgAT
-        self.background = {'A':bgAT, 'C':bgGC, 'G':bgGC, 'T':bgAT}
-
-    def _compute_logP_from_ll(self):
-        """compute self's logP matrix from the self.ll (log-likelihood)"""
-        log = math.log
-        logP = []
-        for i in range(self.width):
-            D = {}
-            for L in ACGT:
-                ''' if   ll = log(p/b) then
-                       2^ll = p/b
-                  and    ll = log(p) - log(b)
-                  so log(p) = ll + log(b)'''
-                #Pij = pow(2.0, self.ll[i][letter]) * self.background[letter]
-                D[L] = self.ll[i][L] + log(self.background[L])/log(2.)
-            logP.append(D)
-        self.logP = logP
-
-    def _print_ll(self):
-        """print log-likelihood (scoring) matrix"""
-        print "#  ",
-        for i in range(self.width):
-            print "  %4d   "%i,
-        print
-        for L in ['A', 'C', 'T', 'G']:
-            print "#%s "%L,
-            for i in range(self.width):
-                print  "%8.3f "%self.ll[i][L],
-            print
-    def _print_p(self):
-        """print probability (frequency) matrix"""
-        print "#  ",
-        for i in range(self.width):
-            print "  %4d   "%i,
-        print
-        for L in ['A', 'C', 'T', 'G']:
-            print "#%s "%L,
-            for i in range(self.width):
-                print  "%8.3f "%math.pow(2,self.logP[i][L]),
-            print
-    def _print_counts(self):
-        """print count matrix"""
-        print "#  ",
-        for i in range(self.width):
-            print "  %4d   "%i,
-        print
-        for L in ['A', 'C', 'T', 'G']:
-            print "#%s "%L,
-            for i in range(self.width):
-                print  "%8.3f "%self.counts[i][L],
-            print
-
-    def _maxscore(self):
-        """sets self.maxscore and self.minscore"""
-        total = 0
-        lowtot= 0
-        for lli in self.ll:
-            total = total + max(lli.values())
-            lowtot= lowtot+ min(lli.values())
-        self.maxscore = total
-        self.minscore = lowtot
-
-    def _compute_threshold(self,z=2.0):
-        """for Motif objects assembled from a set of sequence,
-        compute a self.threshold with a z-score based on the distribution
-        of scores in among the original input sequences.
-        """
-        scoretally = []
-        for seq in self.seqs:
-            matches,endpoints,scores = self.scan(seq,-100)
-            scoretally.append(scores[0])
-        ave,std = avestd(scoretally)
-        self.threshold = ave - z *std
-        #print '#%s: threshold %5.2f = %5.2f - %4.1f * %5.2f'%(
-        #    self, self.threshold, ave, z, std)
-
-    def bestscanseq(self,seq):
-        """return score,sequence of the best match to the motif in the supplied sequence"""
-        matches,endpoints,scores = self.scan(seq,-100)
-        t = zip(scores,matches)
-        t.sort()
-        bestseq   = t[-1][1]
-        bestscore = t[-1][0]
-        return bestscore, bestseq
-
-    def bestscore(self,seq):
-        """return the score of the best match to the motif in the supplied sequence"""
-        return m.bestscan(seq)
-
-    def bestscan(self,seq):
-        """return the score of the best match to the motif in the supplied sequence"""
-        matches,endpoints,scores = self.scan(seq,-100)
-        if not scores: return -100
-        scores.sort()
-        best = scores[-1]
-        return best
-
-    def matchstartorient(self,seq, factor=0.7):
-        """returns list of (start,orientation) coordinate pairs of matches to
-        the motif in the supplied sequence.  Factor is multiplied by m.maxscore
-        to get a match threshold.
-        """
-        ans = []
-        txts,endpoints,scores = self.scan(seq,factor=factor)
-        for txt, startstop in zip(txts,endpoints):
-            start, stop = startstop
-            rctxt  = reverse_complement(txt)
-            orient = (self.bestscore(txt,1) >= self.bestscore(rctxt,1))
-            ans.append((start,orient))
-        return ans
-
-    def scan(self, seq, threshold = '', factor=0.7):
-        """
-        Scan the sequence.  Returns three lists: matching sequences, endpoints,
-        and scores.  The value of 'factor' is multiplied by m.maxscore to get a
-        match threshold if none is supplied
-        """
-        if len(seq) < self.width:
-            return self._scan_smaller(seq,threshold)
-        else:
-            return self._scan(seq,threshold,factor=factor)
-
-    def scansum(self,seq,threshold = -1000):
-        """
-        Sum of scores over every window in the sequence.  Returns
-        total, number of matches above threshold, average score, sum of exp(score)
-        """
-        ll = self.ll
-        sum = 0
-        width        = self.width
-        width_r      = range(width)
-        width_rcr    = range(width-1,-1,-1)
-        width_ranges = zip(width_r,width_rcr)
-        seqcomp      = seq.translate(revcompTBL)
-
-        total = 0
-        hits  = 0
-        etotal= 0
-        for offset in range(len(seq)-width+1):
-            total_f = 0
-            total_r = 0
-            for i,ir in width_ranges:
-                pos = offset+i
-                total_f = total_f + ll[i][    seq[pos]]
-                total_r = total_r + ll[i][seqcomp[pos]]
-            total_max = max(total_f,total_r)
-            if total_max >= threshold:
-                total = total + total_max
-                etotal = etotal + math.exp(total_max)
-                hits  = hits + 1
-            if not hits:
-                ave = 0
-            else:
-                ave = float(total)/float(hits)
-        return total,hits,ave,math.log(etotal)
-
-    def score(self, seq, fwd='Y'):
-        """returns the score of the first w-bases of the sequence, where w is the motif width."""
-        matches, endpoints, scores = self._scan(seq,threshold=-100000,forw_only=fwd)
-        return scores[0]
-
-    def bestscore(self,seq, fwd=''):
-        """returns the score of the best matching subsequence in seq."""
-        matches, endpoints, scores = self._scan(seq,threshold=-100000,forw_only=fwd)
-        if scores: return max(scores)
-        else:      return -1000
-
-    def _scan(self, seq,threshold='',forw_only='',factor=0.7):
-        """internal tility function for performing sequence scans"""
-        ll = self.ll #Shortcut for Log-likelihood matrix
-        if not threshold: threshold = factor * self.maxscore
-
-        #print '%5.3f'%(threshold/self.maxscore)
-        matches       = []
-        endpoints     = []
-        scores        = []
-        width         = self.width
-        width_r       = range(width)
-        width_rcr     = range(width-1,-1,-1)
-        width_ranges  = zip(width_r,width_rcr)
-
-        seqcomp = seq.translate(revcompTBL)
-
-        for offset in range(len(seq)-self.width+1):    #Check if +/-1 needed
-            total_f = 0
-            total_r = 0
-            for i,ir in width_ranges:
-                pos = offset+i
-                total_f = total_f + ll[i ][    seq[pos]]
-                total_r = total_r + ll[ir][seqcomp[pos]]
-
-            if 0 and total_f > 1:
-                for i,ir in width_ranges:
-                    print seq[offset+i],'%6.3f'%ll[i ][        seq[offset+i] ],'   ',
-                print '= %7.3f'%total_f
-
-            if 0:
-                print "\t\t%s vs %s: F=%6.2f R=%6.2f %6.2f %4.2f"%(seq[offset:offset+self.width],
-                                                                   self.oneletter,total_f,total_r,
-                                                                   self.maxscore,
-                                                                   max([total_f,total_r])/self.maxscore)
-            if total_f > threshold and ((total_f > total_r) or forw_only):
-                endpoints.append( (offset,offset+self.width-1) )
-                scores.append(total_f)
-                matches.append(seq[offset:offset+self.width])
-            elif total_r > threshold:
-                endpoints.append( (offset,offset+self.width-1) )
-                scores.append(total_r)
-                matches.append(seq[offset:offset+self.width])
-        return matches,endpoints,scores
-    def _scan_smaller(self, seq, threshold=''):
-        """internal utility function for performing sequence scans. The sequence
-        is smaller than the PSSM.  Are there good matches to regions of the PSSM?"""
-        ll = self.ll #Shortcut for Log-likelihood matrix
-        matches   = []
-        endpoints = []
-        scores    = []
-        w         = self.width
-        for offset in range(self.width-len(seq)+1):    #Check if +/-1 needed
-            maximum = 0
-            for i in range(len(seq)):
-                maximum = maximum + max(ll[i+offset].values())
-            if not threshold: threshold = 0.8 * maximum
-            total_f = 0
-            total_r = 0
-            for i in range(len(seq)):
-                total_f = total_f + ll[i+offset      ][        seq[i] ]
-                total_r = total_r + ll[w-(i+offset)-1][revcomp[seq[i]]]
-            if 0:
-                print "\t\t%s vs %s: F=%6.2f R=%6.2f %6.2f %4.2f"%(seq, self.oneletter[offset:offset+len(seq)],
-                                                                   total_f, total_r,  maximum,
-                                                                   max([total_f,total_r])/self.maxscore)
-            if total_f > threshold and total_f > total_r:
-                endpoints.append( (offset,offset+self.width-1) )
-                scores.append(total_f)
-                matches.append(seq[offset:offset+self.width])
-            elif total_r > threshold:
-                endpoints.append( (offset,offset+self.width-1) )
-                scores.append(total_r)
-                matches.append(seq[offset:offset+self.width])
-        return matches,endpoints,scores
-
-    def mask_seq(self,seq):
-        """return a copy of input sequence in which any regions matching m are
-        replaced with strings of N's """
-        masked = ''
-        matches, endpoints, scores = self.scan(seq)
-        cursor = 0
-        for start, stop in endpoints:
-            masked = masked + seq[cursor:start] + 'N'*self.width
-            cursor = stop+1
-        masked = masked + seq[cursor:]
-        return masked
-
-    def masked_neighborhoods(self,seq,flanksize):
-        """chop up the input sequence into regions surrounding matches to m.
-        Replace the subsequences that match the motif with N's."""
-        ns = self.seq_neighborhoods(seq,flanksize)
-        return [self.mask_seq(n) for n in ns]
-
-    def seq_neighborhoods(self,seq,flanksize):
-        """chop up the input sequence into regions surrounding matches to the
-        motif."""
-        subseqs = []
-        matches, endpoints, scores = self.scan(seq)
-        laststart, laststop = -1, -1
-        for start, stop in endpoints:
-            curstart, curstop = max(0,start-flanksize), min(stop+flanksize,len(seq))
-            if curstart > laststop:
-                if laststop != -1:
-                    subseqs.append(seq[laststart:laststop])
-                laststart, laststop = curstart, curstop
-            else:
-                laststop = curstop
-        if endpoints: subseqs.append(seq[laststart:laststop])
-        return subseqs
-
-    def __sub__(self,other):
-        pass
-        """Overloads the '-' operator to compute the Euclidean distance between
-        probability matrices motifs of equal width."""
-        if type(other) != type(self):
-            print "computing distance of unlike pssms (types %s, %s)"%(
-                type(other),type(self))
-            print 'First: %s'%other
-            print 'Self:  %s'%self
-            sys.exit(1)
-        if other.width != self.width:
-            print "computing distance of unlike pssms (width %d != %d)"%(
-                other.width,self.width)
-            sys.exit(1)
-        D = 0
-        FABS = math.fabs
-        POW  = math.pow
-        for L in self.logP[0].keys():
-            for i in range(self.width):
-                D = D + POW( POW(2,self.logP[i][L]) - POW(2,other.logP[i][L]), 2 )
-                #D = D + FABS( POW(2,self.logP[i][L]) - POW(2,other.logP[i][L]))
-                #D = D + FABS(self.logP[i][L] - other.logP[i][L])
-        return math.sqrt(D)
-
-    def maskdiff(self,other):
-        """a different kind of motif comparison metric.  See THEME paper for
-        details"""
-        return maskdiff(self,other)
-
-    def maxdiff(self):
-        """compute maximum possible Euclidean distance to another motif.  (For
-        normalizing?)"""
-        POW  = math.pow
-        D = 0
-        for i in range(self.width):
-            _min = 100
-            _max = -100
-            for L in ACGT:
-                val = POW(2,self.logP[i][L])
-                if   val > _max:
-                    _max  = val
-                    _maxL = L
-                elif val < _min:
-                    _min  = val
-                    _minL = L
-            for L in ACGT:
-                if L == _minL:
-                    delta = 1-POW(2,self.logP[i][L])           #1-val
-                    D = D + delta*delta
-                else:
-                    D = D + POW( POW(2,self.logP[i][L]), 2)    #0-val
-        return math.sqrt(D)
-
-    def revcomp(self):
-        """return reverse complement of motif"""
-        return revcompmotif(self)
-    def trimmed(self,thresh=0.1):
-        """return motif with low-information flanks removed.  'thresh' is in bits."""
-        for start in range(0,self.width-1):
-            if self.bits[start]>=thresh: break
-        for stop  in range(self.width,1,-1):
-            if self.bits[stop-1]>=thresh: break
-        m = self[start,stop]
-        return m
-    def bestseqs(self,thresh=None):
-        """return all k-mers that match motif with a score >= thresh"""
-        if not thresh:
-            if self._bestseqs:
-                return self._bestseqs
-        if not thresh: thresh = 0.8 * self.maxscore
-        self._bestseqs = bestseqs(self,thresh)
-        return self._bestseqs
-    def emit(self,prob_min=0.0,prob_max=1.0):
-        """consider motif as a generative model, and have it emit a sequence"""
-        if not self.cumP:
-            for logcol in self.logP:
-                tups = []
-                for L in ACGT:
-                    p = math.pow(2,logcol[L])
-                    tups.append((p,L))
-                tups.sort()
-                cumu = []
-                tot  = 0
-                for p,L in tups:
-                    tot = tot + p
-                    cumu.append((tot,L))
-                self.cumP.append(cumu)
-        s = []
-        #u = random()+0.01 #Can make higher for more consistent motifs
-        for cumu in self.cumP:
-            u = (prob_max-prob_min)*random() + prob_min
-            #u = random()+0.01 #Can make higher for more consistent motifs
-            last = 0
-            for p,L in cumu:
-                if last < u and u <= p:
-                    letter = L
-                    break
-                else: last = p
-#           print L,'%8.4f'%u,cumu
-            s.append(L)
-        #print ''.join(s)
-        return ''.join(s)
-
-
-    def random_kmer(self):
-        """generate one of the many k-mers that matches the motif.  See m.emit()
-        for a more probabilistic generator"""
-        if not self._bestseqs: self._bestseqs = self.bestseqs()
-        seqs   = self._bestseqs
-        pos = int(random() * len(seqs))
-        print 'Random: ',self.oneletter,seqs[pos][1]
-        return seqs[pos][1]
-
-    def __getitem__(self,tup):
-        pass
-        """
-        m.__getitem__(tup) -- Overload m[a,b] to submotif.  Less pythonish than [:], but more reliable
-        """
-        if len(tup) != 2:
-            print "Motif[i,j] requires two arguments, not ",tup
-        else:
-            beg, end = tup[0], tup[1]
-            return submotif(self,beg,end)
-    def __getslice__(self,beg,end):
-        pass
-        """
-        m.__getslice__(,beg,end) -- Overload m[a:b] to submotif.
-        """
-        if beg >= end:
-            #Probably python converted negative idx.  Undo
-            beg = beg - self.width
-        return submotif(self,beg,end)
-    def __add__(self,other):
-        pass
-        """
-        m.__add__(other) -- Overload  '+' for concatenating motifs
-        """
-        return merge(self,other,0)
-    def __len__(self):
-        pass
-        """
-        m.__len__()  -- Overload len(m) to return width
-        """
-        return self.width
-    def shuffledP(self):
-        """
-        m.shuffledP() -- Generate motif in which probability matrix has been shuffled.
-        """
-        return shuffledP(self)
-    def copy(self):
-        """return a 'deep' copy of the motif"""
-        a = Motif()
-        a.__dict__ = self.__dict__.copy()
-        return a
-
-    def random_diff_avestd(self,iters=5000):
-        """see modules' random_diff_avestd"""
-        return random_diff_avestd(self,iters)
-    def bogus_kmers(self,count=200):
-        """Generate a faked multiple sequence alignment that will reproduce the
-        probability matrix."""
-
-        POW  = math.pow
-        #Build p-value inspired matrix
-        #Make totals cummulative:
-        # A: 0.1 C: 0.4 T:0.2 G:0.3
-        #                            ->  A:0.0 C:0.1 T:0.5 G:0.7  0.0
-
-        #Take bg into account:
-        # We want to pick P' for each letter such that:
-        #     P'/0.25  = P/Q
-        # so  P'       = 0.25*P/Q
-
-        m = []
-        for i in range(self.width):
-            _col = []
-            tot   = 0.0
-            for L in ACGT:
-                _col.append( tot )
-                tot = tot + POW(2,self.logP[i][L]) * 0.25 / self.background[L]
-            _col.append(tot)
-            #Renormalize
-            for idx in range(len(_col)):
-                _col[idx] = _col[idx] / _col[-1]
-            m.append(_col)
-
-        for p in range(0): #Was 5
-            for i in range(len(m)):
-                print '%6.4f  '%m[i][p],
-            print
-
-        seqs=[]
-        for seqnum in range(count+1):
-            f = float(seqnum)/(count+1)
-            s = []
-            for i in range(self.width):
-                for j in range(4):
-                    if (m[i][j] <= f and f < m[i][j+1]):
-                        s.append(ACGT[j])
-                        break
-            seqs.append(''.join(s))
-
-        del(seqs[0])
-        #for i in range(count):
-        #    print ">%3d\n%s"%(i,seqs[i])
-
-        return seqs
-
-
-def minwindowdiff(M1,M2,overlap=5,diffmethod='diff'):
-    #Alternate method: maskdiff, infomaskdiff
-    if type(M1) != type(M2):
-        print "Error: Attempted to compute alignment of objects that are not both Motifs"
-        print "       types %s: %s  and %s: %s"%(M1,type(M1),M2,type(M2))
-        sys.exit(1)
-
-    if M1.width <= M2.width: A = M1; Borig = M2
-    else:                    A = M2; Borig = M1
-    wA = A.width
-    wB = Borig.width
-    O  = overlap
-
-    if   diffmethod == 'diff':
-        diff_fcn = diff
-    elif diffmethod == 'maskdiff':
-        diff_fcn = maskdiff
-    elif diffmethod == 'infomaskdiff':
-        diff_fcn = infomaskdiff
-
-    mindiff = 1000
-    #print 'minwindodebug    wA ', wA, 'wB ', wB, 'O ', O, 'wA-0', wA-O, 'wB-O', wB-O
-    for Astart in range(wA-O+1):
-        subA = A[Astart:Astart+O]
-        for B in [Borig, Borig.revcomp()]:
-            for Bstart in range(wB-O+1):
-                subB = B[Bstart:Bstart+O]
-                mindiff = min(mindiff, diff_fcn(subA,subB))
-                #print 'minwindodebug     ',subA, subB, diff_fcn(subA,subB)
-    return mindiff
-
-
-def minaligndiff(M1,M2,overlap=5,diffmethod='diff'):
-    #Alternate method: maskdiff, infomaskdiff
-    if type(M1) != type(M2):
-        print "Error: Attempted to compute alignment of objects that are not both Motifs"
-        print "       types %s: %s  and %s: %s"%(M1,type(M1),M2,type(M2))
-        sys.exit(1)
-
-    if M1.width <= M2.width:
-        A = M1; Borig = M2
-        switch = 0
-    else:
-        A = M2; Borig = M1
-        switch = 1
-    wA = A.width
-    wB = Borig.width
-    O  = overlap
-
-    '''
-    Here is the figure to imagine:
-       012345678901234567890   wA: 6  Bstart: 6-3     = 3
-         A         (A)         wB: 11 Bstop:  6+11-3-1= 13
-       ------     %%%%%%        O: 3  lastA:  6+11-3-3= 11
-          -----------
-          |O|  B
-    '''
-
-    if   diffmethod == 'diff':
-        diff_fcn = diff
-    elif diffmethod == 'maskdiff':
-        diff_fcn = maskdiff
-    elif diffmethod == 'infomaskdiff':
-        diff_fcn = infomaskdiff
-
-    Bstart = wA-O
-    Bstop  = wA+wB-O-1
-    lastA  = wA+wB-O-O
-    Dmin = 1000
-    Dmins=[]
-    #print A
-    #print '%s%s'%(' '*Bstart,Borig)
-    for B in [Borig, Borig.revcomp()]:
-        for start in range(0,lastA+1):
-            Bpos = []
-            Apos = []
-            for offset in range(wA):
-                abs = start+offset
-                if abs >= Bstart and abs <= Bstop:
-                    Apos.append(offset)
-                    Bpos.append(abs-Bstart)
-            subA = A[min(Apos),max(Apos)+1]
-            subB = B[min(Bpos),max(Bpos)+1]
-            #print '%s%s\n%s%s  %f'%(
-            #    ' '*start, subA,
-            #    ' '*start, subB,   diff_fcn(subA,subB))
-            if switch: _diff = diff_fcn(subB,subA)
-            else:      _diff = diff_fcn(subA,subB)
-            Dmin = min(Dmin, _diff)
-    return Dmin
-
-'''
-To compare 2 motifs of the same width, there are these five functions:
-
-m1 - m2            - Euclidean Distance (sqrt(sum_col(sum_row)))
-diff(m1,m2)        - psuedo-Euclidean (sum_col(sqrt(norm(sum_row)))/#col
-maskdiff(m1,m2)    - diff, but excluding positions with "N" in m2
-infomaskdiff(m1,m2)- diff, but scaling distance by normalized
-     information content at each position in m2.
-diverge(m1,m2)     - Mutual information sum[p log (p/q)]
-
-**Note that maskdiff, infomaskdiff, and diverge are not symmetric functions
-
-To compare 2 motifs of different widths, there is the function:
-
-minaligndiff(M1,M2,overlap=5,diffmethod='diff')
-
-this does a 'sliding' comparison of two motifs and reports the minimum
-distance over all alignments.  overlap refers to the minumum overlap
-required while sliding.  Below, overlap is '2'.  The default is '5'.
-
-      ------
-          -----------
-
-You can optionally specify the distance metric as a text string.
-The default is 'diff'.
-
-'''
-
-
-def diff(self,other):
-    """psuedo-Euclidean (sum_col(sqrt(norm(sum_row)))/#col"""
-    if type(other) != type(self):
-        print "computing distance of unlike pssms (types %s, %s)"%(
-            type(other),type(self))
-        print 'First: %s'%other
-        print 'Self:  %s'%self
-        sys.exit(1)
-    if other.width != self.width:
-        print "computing distance of unlike pssms (width %d != %d)"%(
-            other.width,self.width)
-        sys.exit(1)
-    POW     = math.pow
-    Dtot    = 0
-    for i in range(self.width):
-        '''Computes distance'''
-        D = 0
-        for L in ACGT:
-            D = D + POW( POW(2,self.logP[i][L]) - POW(2,other.logP[i][L]), 2 )
-        Dtot = Dtot + math.sqrt(D)/math.sqrt(2.0)
-    return Dtot/self.width
-
-
-def maskdiff(self,other):
-    """diff, but excluding positions with 'N' in m2. Return pseudo-Euclidean
-    distance, but only include columns that are not background."""
-    if type(other) != type(self):
-        print "computing distance of unlike pssms (types %s, %s)"%(
-            type(other),type(self))
-        print 'First: %s'%other
-        print 'Self:  %s'%self
-        sys.exit(1)
-    if other.width != self.width:
-        print "computing distance of unlike pssms (width %d != %d)"%(
-            other.width,self.width)
-        sys.exit(1)
-
-    Dtot = 0
-    POW  = math.pow
-    NEAR0= lambda x:(-0.01 < x and x < 0.01)
-    divisor = 0
-    for i in range(self.width):
-        nearcount = 0
-
-        '''Implements mask'''
-        for L in ACGT:
-            diff = POW(2,other.logP[i][L]) - other.background[L]
-            if NEAR0(diff): nearcount = nearcount + 1
-        if nearcount == 4:
-            #print 'Skipping position %d :'%i,other.logP[i]
-            continue
-
-        '''Computes distance'''
-        divisor = divisor + 1
-        D = 0
-        for L in ACGT:
-            D = D + POW( POW(2,self.logP[i][L]) - POW(2,other.logP[i][L]), 2 )
-        Dtot = Dtot + math.sqrt(D)/math.sqrt(2.0)
-    return Dtot/divisor
-
-def infomaskdiff(self,other):
-    """Return pseudo-Euclidean distance, but scale column distance by
-    information content of "other".  Used by THEME"""
-    if type(other) != type(self):
-        print "computing distance of unlike pssms (types %s, %s)"%(
-            type(other),type(self))
-        print 'First: %s'%other
-        print 'Self:  %s'%self
-        sys.exit(1)
-    if other.width != self.width:
-        print "computing distance of unlike pssms (width %d != %d)"%(
-            other.width,self.width)
-        sys.exit(1)
-
-    maxbits = math.log( 1.0/min(other.background.values()) ) / math.log(2.0)
-    '''or... alternatively'''
-    #print maxbits, max(other.bits)
-    #print other.bits
-    maxbits = max(other.bits)
-    if maxbits < 0.1:  #'''There is nothing important here'''
-        return 1
-
-    Dtot    = 0
-    POW     = math.pow
-    divisor = 0
-    '''Computes distance'''
-    for i in range(self.width):
-        D = 0
-        for L in ACGT:
-            D = D + POW( POW(2,self.logP[i][L]) - POW(2,other.logP[i][L]), 2 )
-        col_dist  = math.sqrt(D)/math.sqrt(2.0)
-        col_scale = other.bits[i]/maxbits
-        divisor = divisor + col_scale
-        Dtot = Dtot + col_dist*col_scale
-    return Dtot/divisor
-
-def diverge(self,other):
-    """Yet another distance metric"""
-    if type(other) != type(self):
-        print "computing distance of unlike pssms (types %s, %s)"%(
-            type(other),type(self))
-        print 'First: %s'%other
-        print 'Self:  %s'%self
-        sys.exit(1)
-    if other.width != self.width:
-        print "computing distance of unlike pssms (width %d != %d)"%(
-            other.width,self.width)
-        sys.exit(1)
-
-    Dtot = 0
-    POW  = math.pow
-    LOG2 = lambda x:math.log(x)/math.log(2.0)
-    NEAR0= lambda x:(-0.01 < x and x < 0.01)
-    divisor = 0
-    for i in range(self.width):
-        nearcount = 0
-
-        '''Implements mask'''
-        for L in ACGT:
-            diff = POW(2,other.logP[i][L]) - self.background[L]
-            if NEAR0(diff): nearcount = nearcount + 1
-        if nearcount == 4:
-            #print 'Skipping position %d :'%i,other.logP[i]
-            continue
-
-        '''Computes distance'''
-        divisor = divisor + 1
-        D = 0
-        for L in ACGT:
-            Pself = POW(2, self.logP[i][L])
-            Pother= POW(2,other.logP[i][L])
-            D = D + Pself * LOG2(Pself/Pother)
-        Dtot = Dtot + D
-    return Dtot/divisor
-
-
-
-def bestseqs(motif,thresh, seq='',score=0,depth=0,bestcomplete=None,SEQS=[]):
-    """This function returns a list of all sequences that a motif could
-    match match with a sum(log-odds) score greater than thresh."""
-    if depth == 0:
-        SEQS = []  #Must be a python 2.1 bug. I shouldn't have to do this
-    if not bestcomplete:
-        M = motif
-        maxs = []
-        for i in range(M.width):
-            bestj = 'A'
-            for j in ['C', 'G', 'T']:
-                if M.ll[i][j] > M.ll[i][bestj]:
-                    bestj = j
-            maxs.append(M.ll[i][bestj])
-        bestcomplete = []
-        for i in range(M.width):
-            tot = 0
-            for j in range(i,M.width):
-                tot = tot + maxs[j]
-            bestcomplete.append(tot)
-    if depth == motif.width:
-        if score > thresh:
-            SEQS.append((score,seq))
-        #if len(SEQS) > 2000:
-        #    thresh = 1000.0 # Return Early, You don't really want all these sequences, do you?
-        return
-    if depth==-1:
-        print '# %-10s %6.3f %6.3f %2d'%(seq, score, bestcomplete[depth], depth)
-    if score + bestcomplete[depth] < thresh: return
-    #if depth > 0 and len(SEQS) > 2000:
-    #    return
-    for L in ACGT:
-        newseq   = seq + L
-        newscore = score + motif.ll[depth][L]
-        bestseqs(motif,thresh,newseq,newscore,depth+1,bestcomplete,SEQS)
-    if depth == 0:
-        SEQS.sort()
-        SEQS.reverse()
-        return SEQS
-
-def seqs2fasta(seqs,fasta_file = ''):
-    """
-    seqs2fasta(seqs,fasta_file = '') -- Dumps a Fasta formatted file of sequences,
-    keyed by the sequence itself::
-
-      >ACTTTTTGTCCCA
-      ACTTTTTGTCCCA
-      >ACTTTTGGGGCCA
-      ACTTTTGGGGCCA
-        ...
-
-    """
-    if not fasta_file:
-        fasta_file = tempfile.mktemp()
-    FH = open(fasta_file,'w')
-    for i in range(len(seqs)):
-        FH.write(">%d\n%s\n"%(i,seqs[i]))
-    FH.close()
-    return fasta_file
-
-def top_nmers(N,seqs,with_counts = 0,purge_Ns = ''):
-    """Assemble list of all nmers (kmers) with width 'N' from supplied sequences.
-    Option with_counts returns list of (kmer, count) tuples instead.  Purge N's
-    ignores kmers containing N's.  """
-    Nmers = {}
-    revcompTBL = string.maketrans("AGCTagctnN", "TCGAtcganN")
-    for seq in seqs:
-        for i in range(len(seq)-N+1):
-            Nmer = seq[i:i+N]
-            if purge_Ns:
-                if Nmer.find('N') >= 0: continue
-            _t = list(Nmer.translate(revcompTBL))
-            _t.reverse()
-            NmerRC = ''.join(_t)   # _t used until here to revese comp seq
-            _t = [Nmer, NmerRC]
-            _t.sort()
-            NmerKey = _t[0]        # _t used until here to get alphabetically first seq
-            if Nmers.has_key(NmerKey):
-                Nmers[NmerKey] = Nmers[NmerKey] + 1
-            else:
-                Nmers[NmerKey] = 1
-    sorted = Nmers.keys()
-    sorted.sort(lambda x,y,D=Nmers:cmp(D[y],D[x]) or cmp(x,y))
-    #for i in range(10):
-    #    print "# %2d  %s %d"%(i,sorted[i],Nmers[sorted[i]])
-    if with_counts:
-        return zip(sorted,map(lambda x,N=Nmers:N[x], sorted))
-    else:
-        return sorted
-
-def m_matches(seqs,wmer,m):
-    """Returns list of all kmers among sequences that have at most
-    m mismatches to the supplied wmer (kmer)."""
-    matches = []
-    width = len(wmer)
-    for (nmer, count) in top_nmers(width,seqs,'with counts'):
-        match = 0
-        for i in range(width):
-            if nmer[i] == wmer[i]:
-                match = match+1
-        if match >= m:
-            for i in range(count):
-                matches.append(nmer)
-    return matches
-
-def compare_seqs(s1, s2):
-    pass
-    """
-    compare_seqs(s1, s2)
-    """
-    if len(s1) > len(s2):
-        long  = s1
-        short = s2
-    else:
-        long  = s2
-        short = s1
-    (maxcount,max_i) = (0,0)
-    for i in range(len(long)-len(short)+1):
-        idcount_f = 0
-        idcount_r = 0
-        for j in range(len(short)):
-            if short[j] == long[i+j]:
-                idcount_f = idcount_f + 1
-            if short[-(j+1)] == revcomp[long[i+j]]:
-                idcount_r = idcount_r + 1
-        if (idcount_f > maxcount and idcount_f >= idcount_r):
-            maxcount = idcount_f
-            max_i    = i
-        elif (idcount_r > maxcount):
-            maxcount = idcount_r
-            max_i    = i
-        #print i,j,idcount_f,idcount_r,maxcount
-    maxfrac = float(maxcount) / len(short)
-    print maxfrac,maxcount,len(short)
-    return maxfrac,short,long[max_i:max_i+len(short)]
-
-def shuffle_bases(m):
-    """return a new motif object in which the probabilities are randomly
-    re-assigned to different letters at the same position."""
-    C = []
-    letts = list('ACGT')
-    for i in range(m.width):
-        D = {}
-        vals = m.counts[i].values()
-        shuffle(vals)
-        for i in range(4):
-            D[letts[i]] = vals[i]
-        C.append(D)
-    n = Motif()
-    #n.__dict__ = m.__dict__.copy() #May copy too much information (cached diff information, etc...)
-    n.compute_from_counts(C)
-    return n
-
-def random_diff_avestd(motif,iters=5000):
-    """Return the average & stddev distance ('diff') between a
-    motif and "iters" random motifs of the same width."""
-    w = motif.width
-    vals = []
-    for i in range(iters):
-        vals.append(motif - Random_motif(w))
-    return avestd(vals)
-
-def random_motif(w):
-    """Generate a random motif of width w.  Each position will have a dominant
-    letter with probability around 0.91."""
-    C = []
-    for i in range(w):
-        D = {}
-        tot = 0
-        p = int(random.random() * 4)
-        Lup = ACGT[p]
-        for L in ACGT:
-            D[L] = 0.1
-            tot = tot + 0.001
-        D[Lup] = D[Lup] + 1
-        for L in ACGT:
-            D[L] = D[L]/tot
-        C.append(D)
-    m = Motif()
-    m.compute_from_counts(C)
-    return m
-
-def toDict(M):
-    pass
-    '''
-    toDict(M) -- Convert a 2D array to a list of dictionaries (which is how the motif object
-                 stores information internally).  Assumes M entries are in alphabetical order (ACGT)
-    '''
-    if type(M[0]) == type(0.0):
-        return toDictVect(M)
-    else:
-        a = []
-        for i in range(len(M)):
-            a.append(toDictVect(M[i]))
-        return a
-
-def toDictVect(V):
-    pass
-    """
-    toDictVect(V) -- Convert a 1D vector to a dictionary of DNA letters.  Assumes values
-    in V are in alphabetical order (ACGT).
-    """
-    D = {}
-    for L,i in (('A',0), ('C',1), ('G',2), ('T',3)):
-        D[L]=V[i]
-    return D
-
-def submotif(self,beg,end):
-    """**Deprecated** Use slice functionality (m[2:4]) instead.
-
-    Utility function
-    for extracting sub-motifs and padding motifs."""
-    bg = self.background.copy()
-    P = []
-
-    #Determine if any 'zeros' should be added at begining
-    #because the user has specified a negative beg index
-    for i in range(beg,0):
-        P.append(bg.copy())
-
-    #Copy relevant content of motif
-    start = max(beg,0)
-    stop  = min(end,self.width)
-    for i in range(start,stop):
-        D = {}
-        for L in ACGT:
-            D[L] = math.pow(2.,self.logP[i][L])
-        P.append(D)
-
-    #Determine if any 'zeros' should be added at the end
-    #because the user has specified a width too large
-    for i in range(self.width,end):
-        P.append(bg.copy())
-
-    #print "BEG, END", beg,end
-    #for i in range(beg,end):
-    #    print i,P[i]
-
-    #Build the Motif
-    M = copy.deepcopy(self)
-    #M = Motif(None,bg.copy())
-    M.compute_from_counts(P)
-    M.source = self.source
-    return M
-
-def shuffledP(self):
-    """Construct a motif in which the letter distributions are preserved but
-    are reassigned to rondom positions in the motif."""
-    bg = self.background.copy()
-    P = []
-
-    #Copy relevant content of motif
-    for i in range(0,self.width):
-        D = {}
-        _s = ACGT[:]
-        shuffle(_s)
-        for L,_L in zip(ACGT,_s):
-            D[L] = math.pow(2.,self.logP[i][_L])
-        P.append(D)
-
-    #Build the Motif
-    M = copy.deepcopy(self)
-    #M = Motif(None,bg.copy())
-    M.compute_from_counts(P)
-    M.source = self.source
-    return M
-
-def revcompmotif(self):
-    """Construct the reverse complement of the motif.  Use m.revcomp() member
-    function instead."""
-    bg = self.background.copy()
-    P = []
-
-    for i in range(self.width):
-        D = {}
-        for L in ACGT:
-            D[L] = math.pow(2.,self.logP[self.width-i-1][revcomp[L]])
-        P.append(D)
-
-    #Build the Motif
-    M = copy.deepcopy(self)
-    M.compute_from_counts(P)
-    return M
-
-
-def sum(motifs,weights=[]):
-    """Perhaps better called 'average'.  Constructs a motif by averaging the
-    probabilities at each position of the (pre-aligned) input motifs.  Optional
-    weights can be assigned, and must be in the same order as the motifs.
-    """
-    if not weights:
-        weights = [1.0] * len(motifs)
-    tot = 0.0
-    for w in weights: tot=tot+float(w)
-    weights = [(w/tot) for w in weights]
-    C = []
-    for c in motifs[0].fracs:
-        D = {}
-        for L in ACGT: D[L] = 0.0
-        C.append(D)
-    for m,w in zip(motifs,weights):
-        for i in range(m.width):
-            for L in ACGT:
-                C[i][L] = C[i][L] + m.fracs[i][L]*w
-    motif = Motif_from_counts(C,0.0,bg=motifs[0].background)
-    return motif.trimmed()
-
-
-def giflogo(motif,id,title=None,scale=0.8):
-    """Interface to the 'weblogo/seqlogo' perl
-    scripts that generate colorful sequence logos
-    """
-    return seqlogo(motif,id,title,scale,format='GIF')
-
-
-seqlogo_formats = ('GIF','PDF','EPS','PNG')
-illegal_fn_chars = '&;/ ()'
-fn_trans = string.maketrans(illegal_fn_chars,'_'*len(illegal_fn_chars))
-def seqlogo(motif,motif_id,title=None,scale=0.8,img_format='GIF') :
-    """Interface to the'weblogo/seqlogo' perl scripts that generate colorful
-    sequence logos.  Available formats are %s.  Replaces illegal filename
-    characters in *id* parameter (i.e. '%s') with underscores when writing
-    to file.  The executable *seqlogo* must be on your path.
-    """%(seqlogo_formats,illegal_fn_chars)
-    #SEQLOGO = TAMOpaths.weblogodir + 'seqlogo'
-    #TAMOpaths.CHECK(SEQLOGO,'','Weblogo/Seqlogo')
-    kmers   = motif.bogus_kmers(100)
-    width   = float(len(kmers[0]) )
-    height  = float(4)
-    m       = motif
-    width, height = width*scale, height*scale
-    tmp     = tempfile.mktemp() + '.fsa'
-    if title is None:
-        title = motif_id
-
-    if img_format.upper() not in seqlogo_formats :
-        raise MotifToolsException('seqlogo requires one of %s'%seqlogo_formats)
-
-    seqs2fasta(kmers,tmp)
-    fn = id.translate(fn_trans)
-    cmd = 'seqlogo -F %s -acpY -w%d -h%d -k 1 -M -f %s -o %s -t "%s" '%(
-          img_format.upper(), width, height, tmp, fn, title)
-
-    call(cmd,shell=True)
-    return "%s.%s"%(fn,img_format.lower())
-
-
-def merge(A,B,overlap=0):
-    """**Deprecated** Use the '+' operator instead.
-
-    Used for concatenating motifs into a new motif, allowing for the averaging
-    of overlapping bases between them.
-    """
-    if (overlap < 0 or overlap > A.width or overlap >B.width):
-        print 'Cannot overlap %s with %s by %d bases'%(A.oneletter,B.oneletter,overlap)
-        return None
-
-    #Build Probability matrix.  Width will be A.width + B.width - overlap
-    w = A.width + B.width - overlap
-
-    P = []
-    #Make a copy of A's probabilities into P
-    for i in range(A.width):
-        D = {}
-        logP = A.logP[i]
-        for L in logP.keys():
-            D[L] = math.pow(2,logP[L])
-        P.append(D)
-    #Add B's first 'overlap' probabilities to last 'overlap' probabilities of P
-    for i in range(overlap):
-        logP = B.logP[i]
-        Pidx = len(P)-overlap+i
-        _tot = 0
-        for L in logP.keys():
-            P[Pidx][L] = (P[Pidx][L] + math.pow(2,logP[L])) / 2.0
-            P[Pidx][L] = max(P[Pidx][L],math.pow(2,logP[L]))
-            _tot = _tot + P[Pidx][L]
-        for L in logP.keys():
-            P[Pidx][L] = P[Pidx][L] / _tot
-    #Append B's remaining probabilites to P
-    for i in range(overlap,B.width):
-        D = {}
-        logP = B.logP[i]
-        for L in logP.keys():
-            D[L] = math.pow(2,logP[L])
-        P.append(D)
-
-    #Build a motif
-    M = Motif(None,A.background.copy())
-    M.source = A.source,B.source
-    M.compute_from_counts(P)
-    return M
-
-def avestd(vals):
-    """return an (average, stddev) tuple computed from the supplied list of values"""
-    (sum, sum2) = (0.,0.)
-    N = float(len(vals))
-    for val in vals:
-        sum  = sum  + float(val)
-        sum2 = sum2 + float(val)*float(val)
-    if N == 1:
-        ave = sum
-        std = 0
-    else:
-        ave = sum /  N
-        std = math.sqrt( (sum2-(N*ave*ave)) / (N-1.0) )
-    return ave,std
-
-
-def load(filename):
-    """load a 'TAMO'-formatted motif file"""
-    FID = open(filename,'r')
-    lines = FID.readlines()
-    FID.close()
-    motifs   = []
-    seedD    = {}
-    seedfile = ''
-    for i in range(len(lines)):
-        if lines[i][0:10] == 'Log-odds matrix'[0:10]:
-            w = len(lines[i+1].split())-1
-            ll = []
-            for pos in range(w):
-                ll.append({})
-            for j in range(0,4):
-                toks = lines[i+j+2].split()
-                L = toks[0][1]
-                for pos in range(w):
-                    ll[pos][L] = float(toks[pos+1])
-            m = Motif_from_ll(ll)
-            motifs.append(m)
-        if lines[i][0:6] == 'Motif '[0:6]:
-            toks =  lines[i].split()
-            motifs[-1].nseqs    = float(re.sub('[\(\)]','',toks[3]))
-            motifs[-1].totalbits= float(toks[5])
-            motifs[-1].MAP      = float(toks[7])
-            motifs[-1].seeddist = float(toks[9])
-            motifs[-1].seednum  = int(toks[10][0:-1])
-            motifs[-1].pvalue   = math.pow(10,-float(toks[12]))
-
-            if 'ch:' in toks:
-                _idx = toks.index('ch:')
-                motifs[-1].church = math.pow(10,-float(toks[_idx+1]))
-            if 'Es:' in toks:
-                _idx = toks.index('Es:')
-                motifs[-1].E_site = math.pow(10,-float(toks[_idx+1]))
-            if 'x2:' in toks:
-                _idx = toks.index('x2:')
-                motifs[-1].E_chi2 = math.pow(10,-float(toks[_idx+1]))
-            if 'Eq:' in toks:
-                _idx = toks.index('Eq:')
-                motifs[-1].E_seq = math.pow(10,-float(toks[_idx+1]))
-            if 'mn:' in toks:
-                _idx = toks.index('mn:')
-                motifs[-1].MNCP = float(toks[_idx+1])
-            if 'f:' in toks:
-                _idx = toks.index('f:')
-                motifs[-1].frac = float(toks[_idx+1])
-            if 'Ra:' in toks:
-                _idx = toks.index('Ra:')
-                motifs[-1].ROC_auc = float(toks[_idx+1])
-            if 'cR:' in toks:
-                _idx = toks.index('cR:')
-                motifs[-1].CRA     = float(toks[_idx+1])
-            if 'Cf:' in toks:
-                _idx = toks.index('Cf:')
-                motifs[-1].Cfrac   = float(toks[_idx+1])
-            if 'k:' in toks:
-                _idx = toks.index('k:')
-                motifs[-1].kellis  = float(toks[_idx+1])
-
-            if 'b:' in toks:
-                _idx = toks.index('b:')
-                motifs[-1].numbound = int(toks[_idx+1])
-            if 'nG:' in toks:
-                _idx = toks.index('nG:')
-                motifs[-1].nummotif = int(toks[_idx+1])
-            if 'bn:' in toks:
-                _idx = toks.index('bn:')
-                motifs[-1].numboundmotif = int(toks[_idx+1])
-
-
-
-        if lines[i][0:10] == 'Threshold: '[0:10]:
-            toks =  lines[i].split()
-            motifs[-1].threshold= float(toks[1])
-        if lines[i][0:5] == 'Seed '[0:5]:
-            toks = lines[i].split()
-            id = int(toks[1][0:-1])  #'10:' -> '10'
-            seedD[id] = toks[2]
-        if lines[i][0:7] == 'Source: '[0:7]:
-            motifs[-1].source = lines[i][7:].strip()
-        if lines[i][0:6] == 'Gamma: '[0:6]:
-            motifs[-1].gamma = float(lines[i][6:])
-        if lines[i][0:6] == 'Evalue: '[0:6]:
-            motifs[-1].evalue = float(lines[i][7:].strip())
-        if lines[i][0:22]=='Program specific score: '[0:22]:
-            tempprogscore=lines[i][23:].split(":");
-
-            for i in range(len(tempprogscore)):
-                tempprogscore[i]=tempprogscore[i].strip()
-
-            if len(tempprogscore)>1:
-                try:
-                    tempprogscore[1]=float(tempprogscore[1])
-                except ValueError:
-                    tempprogscore[1]=tempprogscore[1]
-                motifs[-1].progscore=tempprogscore
-
-        if lines[i][0:10] == 'fasta file:'[0:10]:
-            parts=lines[i].strip().split()
-            motifs[-1].dataset, motifs[-1].beta, motifs[-1].bgfile = \
-                        parts[2],float(parts[4]), parts[7]
-
-    if lines[i][0:21]=='classification error: '[0:21]:
-        motifs[-1].cverror=float(lines[i][22:].strip())
-    if lines[i][0:20]=='SVM match threshold: '[0:20]:
-        motifs[-1].match_thresh=float(lines[i][21:].strip())
-    if lines[i].find('Using')>=0 and lines[i].find('as seeds')>=0:
-            '''#Using all (132) motifs in SLT_081503.seeds as seeds:'''
-            seedfile = lines[i].split()[-3]
-    for i in range(len(motifs)):
-        if seedfile: motifs[i].seedfile = seedfile
-        seednum = motifs[i].seednum
-        if seedD.has_key(seednum):
-            motifs[i].seedtxt = seedD[seednum]
-    return motifs
-
-def save_motifs(motifs,filename,kmer_count=20):
-    """Save list of motifs as a 'TAMO'-formatted motif file to the specificied file.
-    optional kmer_count specificies how many sequences to include in the printed
-    multiple sequence alignment that recapitulates the probability matrix."""
-    try :
-        print_motifs(motifs,kmer_count,f=filename)
-    except:
-        print '!-- Error saving motifs to %s'%filename
-        raise
-
-def print_motif(motif,kmer_count=20,istart=0,f=None):
-    """Print a motif in the 'TAMO'-format.  istart specificies the motif number, and
-    optional kmer_count specificies how many sequences to include in the printed
-    multiple sequence alignment that recapitulates the probability matrix. """
-    print_motifs([motif],kmer_count,istart)
-    sys.stdout.flush()
-
-def print_motifs(motifs,kmer_count=20,istart=0,f=None):
-    """Print list of motifs as a 'TAMO'-formatted motif file to the specificied file.
-    Optional kmer_count specificies how many sequences to include in the printed
-    multiple sequence alignment that recapitulates the probability matrix.
-    istart specifies number from which to begin motif ids."""
-
-    # handle f input cases
-    if f is None :
-        f = sys.stdout
-    elif isinstance(f,str) :
-        f = open(f,'w')
-
-    i = istart-1
-    for m in motifs:
-        i = i + 1
-        print >>f,  "Log-odds matrix for Motif %3d %s"%(i,m)
-        m._print >>f, _ll()
-        #print >>f,  "Probability matrix for Motif %3d %s"%(i,m)
-        #m._print >>f, _p()
-        print >>f,  "Sequence Logo"
-        m._print >>f, _bits()
-        for newprop in ('gamma', 'church', 'E_site', 'E_seq', 'E_chi2', 'realpvalue',
-                        'kellis', 'MNCP', 'ROC_auc', 'CRA', 'Cfrac', 'frac', 'binomial'):
-            if not m.__dict__.has_key(newprop):   #Kludge to deal w/ old shelves
-                m.__dict__[newprop] = None
-        if m.seedtxt:  print >>f,  "Seed: %3d %s"%(i,m.seedtxt)
-        if m.gamma:    print >>f,  "Gamma: %7.5f"%m.gamma
-        if m.evalue != None: print >>f,  'Evalue: %6.3e'%m.evalue
-        if m.progscore is not None :
-            printableProgscore=(m.progscore[0],str(m.progscore[1]))
-            print >>f,  'Program specific score: '+ ": ".join(printableProgscore)
-
-        if m.family:   print >>f,  "Family: ",m.family
-        if m.source:   print >>f,  "Source: ",m.source
-        if m.dataset:  print >>f,  "fasta file: %s beta: %f background sequences: %s"%(m.dataset,m.beta,m.bgfile)
-        if m.match_thresh: print >>f,  "SVM match threshold: ",m.match_thresh
-        if m.cverror:  print >>f,  "classification error: ",m.cverror
-        #Motif   0 NGAGGGGGNN (0)            (Bits:   8.24   MAP:   6.53   D:  0.21  0)  Enr: 54.000
-        print >>f,  "Motif %3d %-25s (Bits: %5.2f  MAP: %5.2f   D: %5.3f  %2d) E: %6.3f"%(
-            i, m, m.totalbits, m.MAP, m.seeddist, m.seednum, nlog10(m.pvalue)),
-        if m.binomial!=None:  print >>f,  ' Bi: %5.2f'%nlog10(m.binomial),
-        if m.church != None:  print >>f,  ' ch: %5.2f'%nlog10(m.church),
-        if m.frac   != None:  print >>f,  ' f: %5.2f'%(m.frac),
-        if m.E_site != None:  print >>f,  ' Es: %5.2f'%nlog10(m.E_site),
-        if m.E_seq != None:  print >>f,  ' Eq: %5.2f'%(nlog10(m.E_seq)),
-        if m.MNCP   != None:  print >>f,  ' mn: %5.2f'%(m.MNCP),
-        if m.ROC_auc!= None:  print >>f,  ' Ra: %6.4f'%(m.ROC_auc),
-        if m.E_chi2 != None:
-            if m.E_chi2 == 0: m.E_chi2=1e-20
-            print >>f,  ' x2: %5.2f'%(nlog10(m.E_chi2)),
-        if m.CRA    != None:  print >>f,  ' cR: %6.4f'%(m.CRA),
-        if m.Cfrac  != None:  print >>f,  ' Cf: %6.4f'%(m.Cfrac),
-        if m.realpvalue != None: print >>f,  ' P: %6.4e'%(m.realpvalue)
-        if m.kellis != None:  print >>f,  ' k: %5.2f'%(m.kellis),
-        try:
-            if m.numbound      :  print >>f,  ' b: %3d'%(m.numbound),
-            if m.nummotif      :  print >>f,  ' nG: %3d'%(m.nummotif),
-            if m.numboundmotif :  print >>f,  ' bn: %3d'%(m.numboundmotif),
-        except: pass
-        print >>f, ''
-
-        _max = m.maxscore
-        m.maxscore = -100
-        if kmer_count >= 0:
-            seqs = m.bogus_kmers(kmer_count)
-        else:
-            seqs = m.seqs
-
-        for seq in seqs:
-            print >>f,  seq,i,m.scan(seq)[2][0]
-
-        m.maxscore = _max
-        print >>f,  '*'*m.width
-        print >>f,  "MAP Score: %f"%(m.MAP)
-
-def nlog10(x,min=1e-323):
-    """returns -log10(x) with a maximum default value of 323."""
-    if x < min: x=min
-    try:
-        return math.fabs(math.log(x)/math.log(10))
-    except:
-        return 0
-
-def txt2motifs(txt,VERBOSE=1):
-    """Convert a text string into a list of motifs:
-    Examples:
-
-    'TGASTCA,GAATC'      --> 2 motifs from ambiguity codes
-    'results.tamo'       --> All motifs in TAMO-format file
-    'results.tamo:34,45' --> Motifs 34 and 45 in TAMO-format file
-    'results.pickle'     --> All motifs in pickle (list or dict of Motifs)
-    'results.pickle%GAL4 --> 'GAL4' entry in results.pickle dictionary
-    'results.pickle:34,45 -> Motifs 34 and 45 in results.pickle list
-    """
-    motifs = []
-    exists = os.path.exists
-    toks   = txt.split(':')
-    if exists(toks[0]):               #It's a file!!
-        fname = toks[0]
-        if fname.find('.pickle') > 0: #It's a pickle!!
-            return pickletxt2motifs(toks)
-        else:                         #It's a "Motif" file!!
-            if VERBOSE:
-                print "# Loading motif from %s"%fname
-            allmotifs = load(fname)
-        if len(toks) == 1: motifs = allmotifs
-        else:
-            idxs   = [int(x) for x in toks[1].split(',')]
-            motifs = [allmotifs[x] for x in idxs]
-    else:                             #It's a text string!!
-        fname = 'TXT'
-        for t in txt.split(','):
-            motifs.append(Motif_from_text(t))
-    for i in range(len(motifs)): motifs[i].index = i
-    for i in range(len(motifs)): motifs[i].file = fname
-    return motifs
-
-def pickletxt2motifs(toks):
-    """[Utility function] See txt2motifs documentation."""
-    fname = toks[0]
-    print "# Loading motif pickle from %s"%fname
-    F = open(fname,'r')
-    DA = pickle.load(F)
-    F.close()
-    ans = []
-    if type(DA) == type({}):
-        if len(toks) > 1:
-            keys = [x.replace('%',' ') for x in toks[1].split(',')]
-            for k in keys: ans.append(DA[k])
-        else:
-            for k in DA.keys(): DA[k].key = k
-            ans = DA.values()
-    else: #Assuming DA is a list
-        if len(toks) > 1:
-            idxs = [int(x) for x in toks[1].split(',')]
-            ans  = [DA[x] for x in idxs]
-        else:
-            ans  = DA
-    return ans
-
-
-def sortby(motiflist, property, REV=0):
-    """Sort a motif list according to a particular property"""
-    mtype = type(Motif())
-    for m in motiflist:
-        if type(m) != mtype:
-            print "Not a Motif Object: ",m
-            return
-    try:
-        motiflist.sort(lambda x,y,p=property: cmp(x.__dict__[p],y.__dict__[p]))
-        if REV: motiflist.reverse()
-    except:
-        print 'Could not sort list.  Probably, the specificied property "%s" is not posessed by all motifs'%property
-
-
--- a/chipsequtil-master/src/chipsequtil/nib.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,393 +0,0 @@
-'''Functions and classes used to interface with .nib files as created by Jim
-Kent's nibFrag and faToNib utilities.'''
-
-import glob
-import math
-import os
-import struct
-import sys
-import warnings
-from cStringIO import StringIO
-from collections import defaultdict as dd
-
-from chipsequtil import reverse_complement, get_file_parts, BEDFile
-
-
-# module fields
-NOMASK,MASK,HARDMASK = range(3)
-
-
-class NibException(Exception) : pass
-
-
-def _nib_fd(nib) :
-    '''Returns filename and file descriptor for nib, detecting whether it is a \
-    path or fd appropriately'''
-
-    # check to see if nib is a file or a string
-    if isinstance(nib,file) :
-        nib_fn = nib.name
-        nib.seek(0)
-        nib_f = nib
-    elif isinstance(nib,str) :
-        nib_fn = nib
-        nib_f = open(nib,'rb')
-    else :
-        raise NibException('Incompatible .nib argument %s with type %s, needs to \
-        be either <type \'file\'> or <type \'str\'>'%(str(nib),type(nib)))
-
-    return nib_fn, nib_f
-
-
-def get_nib(nib,start=0,end=-1,strand='+',mask=NOMASK,name=None,dbHeader=None,tbaHeader=None) :
-    '''Return a (header,sequence) tuple representing this nibFrag record'''
-    headers = get_nib_header_batch(nib,[(start,end,strand,name,dbHeader,tbaHeader),])
-    seqs = get_nib_seq_batch(nib,[(start,end,strand)],mask)
-    return headers[0], seqs[0]
-
-
-def get_nib_batch(nib,queries,mask=NOMASK) :
-    '''Batch interface for fetching fasta records.  Returns tuple of lists
-    (headers,sequences)'''
-    headers = get_nib_header_batch(nib,queries)
-    seqs = get_nib_seq_batch(nib,[x[:3] for x in queries],mask=mask)
-    return headers, seqs
-
-
-def get_nib_seq(nib,start=0,end=-1,strand='+',mask=NOMASK) :
-    '''Extract subsequence from .nib file like Jim Kent's nibFrag utility.
-    Default behavior is to return the entire sequence.
-
-    Extract the nucleotide substring defined by the closed interval [start,end]
-    from the sequence found in *nib_fn*.  *mask* parameter has the following
-    possible values:
-
-    chipsequtil.nib.NOMASK -- masked positions are not indicated (default)
-    chipsequtil.nib.MASK -- masked positions are capitalized, normal bases lower case
-    chipsequtil.nib.NOMASK -- masked positions are replaced with Ns
-    '''
-    return get_nib_seq_batch(nib,[(start,end,strand)],mask)[0]
-
-
-def get_nib_header(nib_fn,start=0,end=-1,strand='+',name=None,dbHeader=None,tbaHeader=None) :
-    '''Method for constructing fasta headers compliant with nibFrag utility'''
-    headers = get_nib_header_batch(nib,[(start,end,strand,name,dbHeader,tbaHeader),])
-    return headers[0]
-
-
-def get_nib_header_batch(nib,queries) :
-    '''Batch method for creating nibFrag headers.  *queries* is a list of at most
-    6-tuples (start,end,strand,name,dbHeader,tbaHeader) representing queries as
-    specified by the original nibFrag utility.  Only start, end, and strand
-    fields are required.'''
-
-    nib_path, nib_f = _nib_fd(nib)
-
-    nib_dir,nib_fn,nib_base,nib_ext = get_file_parts(nib_path)
-    nbases = validate_nib_file(nib)
-    headers = []
-    header_tmpl = '>%(name)s%(db)s\n'
-
-    for rec in queries :
-
-        # set some defaults if they are not supplied
-        rec = list(rec)
-        rec.extend([None]*(6-len(rec)))
-        start, end, strand, name, dbHeader, tbaHeader  = rec
-
-        if end == -1 :
-            end = nbases
-        fields = {}
-        fields['name'] = nib_path+':%d-%d'%(start,end) if not name else name
-        fields['db'] = ''
-
-        if tbaHeader :
-            # ignored for some reason in nibFrag when tbaHeader supplied and dbHeader is not
-            fields['name'] = '' if not dbHeader else fields['name']
-            fields['db'] = '%s.%s:%d-%d of %d'%(tbaHeader,nib_base,start,end,nbases)
-        if dbHeader :
-            fields['db'] = ':%s.%s:%d-%d:%s:%d'%(dbHeader,nib_base,start,end,strand,nbases)
-
-        headers.append(header_tmpl%fields)
-
-    return headers
-
-
-def validate_nib_file(nib) :
-    '''Validate .nib file header, returning number of bases indicated if successful.
-    *nib* argument is either a filename or an open file object.
-    '''
-
-    nib_fn, nib_f = _nib_fd(nib)
-
-    # first 4 bytes are a nib file signature
-    #TODO - consider attempting to figure out byte order to make truly cross platform
-    def_sig = 0x6BE93D3A
-    sig = struct.unpack('=l',nib_f.read(4))[0]
-    if def_sig != sig :
-        raise NibException('Invalid nib file signature in %s, found %s, expected \
-        %s, perhaps .nib file as not created on this platform?\n\nnibFrag style \
-        error: %s is not not a good .nib file.'%(nib_fn,hex(sig),hex(def_sig),nib_fn))
-
-    # second 4 bytes are number of bases in sequence
-    nbases = struct.unpack('=l',nib_f.read(4))[0]
-
-    return nbases
-
-
-def get_nib_seq_batch(nib,queries,mask=NOMASK) :
-    '''Extract subsequence from .nib file like Jim Kent's nibFrag utility.
-
-    Extract the nucleotide substrings defined by the closed intervals in *queries*
-    from the sequence found in *nib*.  *nib* argument is either a filename or
-    an open file object.  Entries in *queries* are 3-tuples defining (start,end,strand)
-    sequence coordinates. Sequences are returned in order in a list as
-    strings.  *mask* parameter has the following possible values:
-
-    chipsequtil.nib.NOMASK -- masked positions are not indicated (default)
-    chipsequtil.nib.MASK -- masked positions are capitalized, normal bases lower case
-    chipsequtil.nib.NOMASK -- masked positions are replaced with Ns
-    '''
-
-    nib_fn, nib_f = _nib_fd(nib)
-
-    nbases = validate_nib_file(nib_f)
-
-    # rest of file is sequence, with each nibble (4 bytes) being a base as \
-    # follows (from http://genome.ucsc.edu/FAQ/FAQformat.html#format8) :
-    #
-    # 0 - T
-    # 1 - C
-    # 2 - A
-    # 3 - G
-    # 4 - N
-    #
-    # The most significant bit in a nibble is set if the base is masked
-    trans_nuc = 'tcagn'
-
-    # start translating the nibbles into nucleotides
-    def trans_nib(nib) :
-        nuc = trans_nuc[nib&7]
-        mask_bit = nib & 8
-        if mask in [MASK,HARDMASK] and mask_bit == 0 :
-            return nuc.upper()
-        if mask == HARDMASK and mask_bit == 1 :
-            return 'N'
-        return nuc
-
-    headers = [] # stores headers
-    seqs = [] # stores sequences
-
-    # sort the coords so we can walk most efficiently through the file
-    queries.sort()
-
-    for start, end, strand in queries :
-
-        if start < 0 :
-            raise NibException('Received negative start coordinate, this may '\
-                               'indicate a region on mitochondrial DNA that '\
-                               'spans reference sequence start and end.  This '\
-                               'utility cannot handle these cases, aborting. '\
-                               'Requested interval: %s (%d,%d)'%(nib_fn,start,end))
-
-        start, end = map(int,(start,end))
-
-        # end == -1 means caller wants entire sequence
-        if end == -1  :
-            end = nbases
-
-        if any([nbases < c for c in [start,end]]) :
-            raise NibException(('Requested slice (%(start)d,%(end)d) not compatible ' \
-            'with sequence of length %(nbases)d in %(nib_fn)s, aborting\n\nnibFrag '\
-            'style error: nib read past end of file (%(start)d %(end)d) in file: '\
-            '%(nib_fn)s')%{'start':start,'end':end,'nbases':nbases,'nib_fn':nib_fn})
-
-        # figure out how many bytes to read through
-        start_byte,rem_byte = start/2,start%2
-
-        # calculate where we need to move to in the file from the current location
-        # + 8 is from the 2*4 bytes header info in the .nib format
-        byte_offset = start_byte-nib_f.tell() + 8
-        nib_f.seek(byte_offset,1) # seek forward to the beginning byte from current location
-        seq_bytes,seq_rem_byte = int(math.ceil((end-start+rem_byte)/2.)),(end+1)%2
-        seq_bytes = nib_f.read(seq_bytes+seq_rem_byte)
-
-        # start translating the bytes
-        seq = StringIO() # we use StringIO because it is more efficient than concatenating strings
-        for c in seq_bytes :
-            c_byte = struct.unpack('=b',c)[0]
-
-            # higher nibble
-            c_nib = (c_byte & (15<<4))>>4
-            nuc = trans_nib(c_nib)
-            seq.write(nuc)
-
-            # lower nibble
-            c_nib = int(c_byte) & 15
-            nuc = trans_nib(c_nib)
-            seq.write(nuc)
-
-        # final nucleotide sequence
-        seq_str = seq.getvalue()
-
-        # if we're reading to the end, don't clip anything
-        if end != nbases :
-            # if the coordinate requested was not on a byte boundary, adjust
-            if rem_byte == 1 :
-                seq_str = seq_str[1:]
-            if seq_rem_byte == 1 :
-                seq_str = seq_str[:-1]
-
-            # nibFrag apparently uses zero-based indexing, clip off one base
-            seq_str = seq_str[:-1]
-        seq.close()
-
-        # adjust strand
-        if strand == '-' :
-            seq_str = reverse_complement(seq_str)
-        seqs.append(seq_str)
-
-    return seqs
-
-
-class SeqDBException(Exception): pass
-class NibDBException(Exception): pass
-
-
-class SeqDB(object) :
-    '''Base class for different kinds of sequence databases.  Does nothing,
-    implement subclasses.  Constructor rovides _db_map and db_info class members.'''
-    def __init__(self) :
-        self._db_map = {}
-        self.db_info = dd(dict)
-
-    def get_seq(self,*args, **kwargs) :
-        raise SeqDBException('Base class SeqDB has no get_seq implementation')
-
-
-class NibDB(SeqDB) :
-    '''Class providing an interface to a set of .nib files as created by faToNib
-    in Jim Kent's software suite.
-
-    Sequences are identified by the basename of the .nib file without the .nib
-    extension, e.g. chr1.nib is identified as chr1.
-
-    Some potentially useful information about the entries in the database is
-    stored in the *nib_info* dictionary.
-    '''
-
-    def __init__(self,nib_fns=[],nib_dirs=[]) :
-        '''*nib_fns* is a list of paths to specific .nib files desired for the
-        NibDB.  *nib_dirs* is a list of paths to directories containing .nib
-        files such that every .nib file in the directories is added to the NibDB.
-        Explicitly passed files take precedence over those found in directories
-        when sequence names collide.
-        '''
-        SeqDB.__init__(self)
-
-        # find all *.nib files in the directories passed
-        if isinstance(nib_dirs,str) : # user just provided single directory
-            nib_dirs = [nib_dirs]
-
-        dir_nibs = []
-        for d in nib_dirs :
-            dir_nibs.extend(glob.glob(os.path.join(d,'*.nib')))
-
-        if isinstance(nib_fns,str) :
-            nib_fns = [nib_fns]
-        # for each .nib found, add to db
-        # if there is a collision of names, those specified in files (not dirs)
-        # takes precedence without warning
-        for fn in dir_nibs+nib_fns :
-
-            # open the nib file
-            nib_path,nib_fn,nib_base,nib_ext = get_file_parts(fn)
-            fn, nib_f = _nib_fd(fn)
-            self._db_map[nib_base] = nib_f
-
-            # store some info
-            self.db_info[nib_base]['path'] = fn
-            nbases = validate_nib_file(self._db_map[nib_base])
-            self.db_info[nib_base]['nbases'] = nbases
-
-    def __del__(self) :
-        '''import this
-        ...Explicit is better than implicit...
-        '''
-        for nib_f in self._db_map.values() :
-            nib_f.close()
-
-    def _get_db_map(self,name) :
-        '''Gets appropriate file handle for the requested name, raises NibDBException
-        if it cannot be found'''
-        try :
-            return self._db_map[name]
-        except KeyError :
-            raise NibDBException('Sequence name %s not found in NibDB'%name)
-
-    def get_fasta(self,name,start=0,end=-1,strand='+',mask=NOMASK) :
-        '''Get the fasta record for the specified arguments, returns (header,sequence)
-        tuple.'''
-
-        nib_f = self._get_db_map(name)
-        return get_nib(nib_f,start,end,strand,mask)
-
-    def get_fasta_batch(self,recs,mask=NOMASK) :
-        '''Batch version of *get_fasta* method.  *recs* is a list of lists/tuples
-        with (<chromo>,<start>,<end>,<strand>). Returns list of (header,sequence)
-        tuples in the same sequence as the input records.'''
-
-        # gather the records for each chromosome together
-        chrom_recs = dd(list)
-        for i,r in enumerate(recs) :
-            chrom_recs[r[0]].append((i,r)) # recs are (index,<tuple>)
-
-        # extract sequences
-        all_chrom_recs = []
-        for chrom, rec_list in chrom_recs.items() :
-            # sorted lists make sequence extraction efficient
-            rec_list.sort(key=lambda x: x[1][1]) # recs are (index,<tuple>)
-
-            # separate indexes from records, extract for this chromo
-            indexes, c_recs = zip(*rec_list)
-
-            # get_nib_batch requires list of (<start>,<end>,<strand>) tuples, remove
-            # chromo in first position
-            c_recs = [r[1:] for r in c_recs]
-
-            nib_f = self._get_db_map(chrom)
-            headers, seqs = get_nib_batch(nib_f,c_recs,mask)
-
-            # return the sequences to a (index,(header,sequence)) list
-            all_chrom_recs.extend(zip(indexes,zip(headers,seqs)))
-
-        # put the sequences back in the original order
-        all_chrom_recs.sort(key=lambda x: x[0]) # recs are (index,<tuple>) again
-        indexes, recs = zip(*all_chrom_recs)
-
-        return zip(*recs)
-
-    def get_fasta_from_bed(self,bed,mask=NOMASK) :
-        '''Accepts either a chipsequtil.BEDFile instance or a filename for a BED
-        file (used to construct a BEDFile instance) and returns the fasta
-        records for all records in order.'''
-
-        # determine if *bed* is a filename or a BEDFile
-        if isinstance(bed,str) : # filename
-            bed = BEDFile(bed)
-
-        # construct the records
-        recs = []
-        for rec in bed :
-            if rec['chrom'].lower().startswith('track') : # track line, skip
-                continue
-            recs.append((rec['chrom'],int(rec['chromStart']),int(rec['chromEnd']),rec['strand']))
-
-        return self.get_fasta_batch(recs,mask)
-
-    def get_seq(self,name,start=0,end=-1,strand='+',mask=NOMASK) :
-        '''Extract sequence from sequence *name*. Other arguments are passed
-        directly to *get_nib_seq* function.'''
-
-        nib_f = self._get_db_map(name)
-        return get_nib_seq(nib_f,start,end,strand,mask)
--- a/chipsequtil-master/src/chipsequtil/plotting.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,24 +0,0 @@
-import math
-
-from matplotlib.pyplot import hist, plot, savefig, title, show, xticks, yticks, figure, clf
-
-from chipsequtil import get_gc_content
-
-def plot_gc_content(sequences,bins=10,fn=None) :
-
-    # calculate all the GC contents, sort them
-    gc_contents = map(get_gc_content,sequences)
-    gc_contents.sort()
-
-    f = figure()
-    points = hist(gc_contents,bins=bins)
-    if fn :
-        savefig(fn)
-    else :
-        show()
-    clf()
-
-
-def plot_pos_neg_peaks(pos_peaks,neg_peaks) :
-    '''Plot # pos peaks/# neg peaks by p-value'''
-    pass
--- a/chipsequtil-master/src/chipsequtil/sampling.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,252 +0,0 @@
-
-import math
-import random
-import re
-import sys
-from collections import defaultdict
-
-from chipsequtil import get_org_settings, get_gc_content, get_gc_content_distribution, RefGeneFile
-from nib import NibDB, NibException
-
-def kl_divergence(p,q) :
-    """Return Kullback-Leibler divergence for two probability distributions
-    p and q.  p and q should be indexable objects of the same length where
-    p_i corresponds to q_i.
-    """
-    kl_sum = 0.
-    for p_i, q_i in zip(p,q) :
-        if p_i != 0 and q_i != 0 :
-            kl_sum += p_i * math.log(p_i/q_i)
-    return kl_sum
-
-def rejection_sample_bg(fg_dict,organism,bins=100,num_samples=None,verbose=False,
-                        bg_match_epsilon=1e-3) :
-    '''Generate background sequences according to the size, distance from genes,
-    and GC content distributions of the supplied foreground sequences.  *fg_dict*
-    is a dictionary of <header>:<sequence> items, where the first part of the
-    header must contain:
-
-    >chrX:<start>-<end>
-
-    *organism* is a string that will be used to call the *chipsequtil.get_org
-    settings* function and uses the 'genome_dir' and 'annotation_path' keys.
-    *bins* is the number of bins to use for representing the GC content
-    distribution.  Function returns a dictionary of <header>:<sequence> items
-    of generated background sequences.'''
-
-    nib_db = NibDB(nib_dirs=[get_org_settings(organism)['genome_dir']])
-    tss_fn = get_org_settings(organism)['annotation_path']
-    tss = defaultdict(list)
-    for rec in RefGeneFile(tss_fn) :
-        tss[rec['chrom']].append((int(rec['txStart']),int(rec['txEnd']),))
-
-    # for each peak find the chromosome, distance to nearest
-    # gene, size of peaks in bases, and GC content
-    num_samples = len(fg_dict) if not num_samples else num_samples
-    dists,sizes=[],[]
-
-    for header,seq in fg_dict.items() :
-
-        # chromosome first field in fasta headers from bed2seq.bedtoseq
-        chrom = header.split(':')[0]
-
-        # adjust chromosomes in special cases
-        if re.search('random',chrom.lower()) or chrom.lower() == 'chrm' :
-            continue
-
-        # start first int in second field of bed2seq.bedtoseq header
-        start = int(header.split(':')[1].split('-')[0])
-        midpoint = start + len(seq)/2
-
-        # figure out which chromosome we're working on
-        tss_chr = tss[chrom]
-
-        # dsts_to_genes is the distance of this peak from all the genes, find minimum
-        dists_to_genes = [(s[0]-midpoint) for s in tss_chr]
-        try :
-            min_dist = min(dists_to_genes,key=lambda x : abs(x))
-            dists.append(min_dist)
-        except :
-            err_str = 'Warning: no genes were found for sequence with header' \
-                         ' %s, not using to calculate distributions.\n'%header
-            sys.stderr.write(err_str)
-
-        # calculate # bases
-        sizes.append(len(seq))
-
-    # GC content distribution for the foreground sequences
-    gc_dist = get_gc_content_distribution(fg_dict.values(),bins=bins)
-
-    # max_gc is # peaks w/ highest GC content
-    max_gc = max(gc_dist)
-
-    # gene_starts is a list of all genes in (chromosome,gene start) tuples
-    gene_starts=[]
-    for key in tss.keys():
-        chrom=key.split('chr')[-1]
-        for x in tss[key]:
-            gene_starts.append((key,x[0]))
-
-    # encapsulated function for proposing sequences
-    def propose_sequence(dists, gene_starts, sizes, nib_db) :
-        # sample a random distance from the list of distances
-        d = random.choice(dists)
-
-        # pick a random gene
-        chrom, coord = random.choice(gene_starts)
-
-        # propose a starting point for the bg sequence
-        midpoint = coord-d+random.randint(-100,100)
-
-        # propose a size for the bg sequence
-        size = random.choice(sizes)
-        start = int(midpoint-int(size/2))
-        stop = int(midpoint+int(size/2))
-
-        #sys.stderr.write("%s:coord=%d size=%d midpoint=%d d=%d\n"%(chrom,coord,size,midpoint,d))
-        # if start or stop are negative, skip and try again
-        if start < 0 or stop < 0 : seq = None
-
-        # randomly choose strand
-        strand = '+' if random.random() > 0.5 else '-'
-
-        # extract the proposed sequence
-        try :
-            nib_title, seq = nib_db.get_fasta(chrom,start,stop,strand)
-        except IOError, e :
-            if verbose : sys.stderr.write('IOError in NibDB, skipping: %s,%d-%d,%s\n'%(chrom,start,stop,strand))
-            seq = None
-        except NibException, e :
-            if verbose : sys.stderr.write('NibDB.get_fasta error, %s\n'%e)
-            seq = None
-
-        header = '%s:%d-%d'%(chrom,start,stop)
-
-        return header, seq
-
-
-    # build gc content distribution based on seq length and
-    # distance from TSS foreground distributions
-    # keep sampling sequences until the distribution stops
-    # changing a lot (KL divergence < epsilon)
-    bg_gc_cnts = [1.]*bins
-    converged = False
-    epsilon = bg_match_epsilon
-    if verbose : sys.stderr.write('Building empirical background GC content distribution\n')
-    while not converged :
-
-        # propose a sequence
-        header, seq = propose_sequence(dists,gene_starts,sizes,nib_db)
-
-        # sometimes this happens when there is an error, just try again
-        if seq is None :
-            continue
-
-        # determine the GC bin for this sequence
-        gc_content = get_gc_content(seq)
-        gc_bin = -1
-        for i in range(bins) :
-            win_start = i/float(bins)
-            win_end = (i+1)/float(bins)
-            if gc_content >= win_start and gc_content < win_end :
-                gc_bin = i
-                break
-
-        # update the gc content distribution
-        sum_cnts = float(sum(bg_gc_cnts))
-        if sum_cnts != 0 : # ! on first sequence
-
-            # calculate the current distributions
-            last_gc_p = map(lambda x:x/sum_cnts,bg_gc_cnts)
-            bg_gc_cnts[gc_bin] += 1
-            new_gc_p = map(lambda x:x/sum_cnts,bg_gc_cnts)
-
-            # calculate the kl divergence between last distribution
-            # and current one, stopping if less than epsilon
-            kl_d = kl_divergence(new_gc_p,last_gc_p)
-            if verbose : sys.stderr.write('dist to converge: %.3g\r'%(kl_d-epsilon))
-            if kl_d < epsilon :
-                converged = True
-
-        else :
-            bg_gc_cnts[gc_bin] += 1
-
-    if verbose : sys.stderr.write('\ndone\n')
-
-    # add pseudocounts to account for missing data in bg as to avoid
-    # inappropriate scaling in rejection sampling step
-    # the fg bin with the largest value that corresponds to an empty
-    # bg bin is used to calculate the number of pseudocounts so that
-    # the resulting bg bin has the same propotion of counts in it as
-    # the original fg bin.  This is calculated as:
-    #
-    # x_{pseudo} = \frac{p_i\sum_{i=1}^{N}a_i}{1-p_iN}
-    #
-    # where p_i is the value of the max fg bin w/ zero in the bg bin
-    # x_{pseudo} is added to every bin
-    pseudocounts = 0
-    for fg_i, bg_i in zip(gc_dist,bg_gc_cnts) :
-        if fg_i != 0 and bg_i == 0 and fg_i*len(fg_dict) > pseudocounts :
-            # if fg_i > 1/sum(bg_gc_cnts) this won't work, but that *shouldn't*
-            # ever happen
-            if fg_i >= 1./sum(bg_gc_cnts) :
-                raise Exception('There was a numeric issue in the rejection sampling routine, please try it again')
-            sys.stderr.write(str([fg_i,sum(bg_gc_cnts),len(bg_gc_cnts),1.*fg_i*len(bg_gc_cnts),bg_gc_cnts])+'\n')
-            sys.stderr.flush()
-            pseudocounts = (fg_i*sum(bg_gc_cnts))/(1-1.*fg_i*len(bg_gc_cnts))
-
-    bg_gc_cnts = map(lambda x: x+pseudocounts/sum(bg_gc_cnts),bg_gc_cnts)
-    bg_gc_dist = map(lambda x: x/sum(bg_gc_cnts),bg_gc_cnts)
-
-    # last, find the multiplier that causes the background gc distribution to
-    # envelope the foreground gc dist
-    z_coeff = gc_dist[0]/bg_gc_dist[0]
-    for fg_i, bg_i in zip(gc_dist[1:],bg_gc_dist[1:]) :
-        z_coeff = max(z_coeff,fg_i/bg_i)
-    bg_gc_dist = map(lambda x: x*z_coeff,bg_gc_dist)
-
-    # start generating bg sequences
-    bg_dict = {}
-
-    bg_gcs,bg_sizes=[],[]
-
-    # generate a bg sequence for every fg sequence
-    for i in range(num_samples):
-        if verbose : sys.stderr.write('%d/%d'%(i,num_samples))
-
-        # propose sequences until one is accepted
-        accepted_sequence = False
-        while not accepted_sequence:
-            if verbose : sys.stderr.write('.')
-
-            # propose a sequence
-            header, seq = propose_sequence(dists,gene_starts,sizes,nib_db)
-
-            # problem occured in proposing sequence, just keep going
-            if seq is None : continue
-
-            # determine the GC bin for this sequence
-            gc_content = get_gc_content(seq)
-            gc_bin = -1
-            for i in range(bins) :
-                win_start = i/float(bins)
-                win_end = (i+1)/float(bins)
-                if gc_content >= win_start and gc_content < win_end :
-                    gc_bin = i
-                    continue
-
-            # pick a uniform random number such that it does not exceed
-            # the maximum GC content distribution over bins
-            # if the random number is <= the GC content for this
-            # proposed sequence, accept, otherwise reject
-            r = random.random() * bg_gc_dist[gc_bin]
-            if r > gc_dist[gc_bin] :
-                continue
-            else:
-                bg_gcs.append(x)
-                #bg_sizes.append(size)
-                accepted_sequence = True
-                bg_dict[header] = seq
-
-        if verbose : sys.stderr.write('\r')
-    return bg_dict
--- a/chipsequtil-master/src/chipsequtil/seq.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,265 +0,0 @@
-from itertools import izip
-from textwrap import wrap
-
-# FASTA functions and classes
-def fasta_itr(f) :
-    '''Returns a generator that iterates through a FASTA formatted file.
-    *f* may be either a text or gzipped file, or a file-like python object
-    representing either of these.  Records are returned in the order they
-    are found.'''
-    if isinstance(f,str) :
-        f = open(f)
-
-    # check for magic number 1f 8b indicating gzip file, I dunno, just cuz
-    if f.read(2) == "\x1f\x8b" : f = gzip.GzipFile(f)
-    else : f.seek(0)
-
-    curr_header, curr_seq = None, None
-    for r in f :
-        if r.startswith('>') :
-            if curr_header is not None :
-                yield (curr_header, curr_seq)
-            curr_header = r[1:].strip()
-            curr_seq = ''
-        else :
-            curr_seq += r.strip()
-    # return the last record
-    yield (curr_header,curr_seq)
-
-def fasta_to_dict(f) :
-    '''Returns a dictionary whose keys are FASTA headers and values are
-    sequences.  *f* may be a text, gzipped file, or a file-like
-    python object representing either of these.'''
-    return dict(fasta_itr(f))
-
-def write_fasta_to_file(fasta,f,linelen=None) :
-    '''Writes the FASTA records in *fasta* to file specified in *f*. *fasta*
-    may be a dictionary like that returned by *fasta_to_dict* or a *FASTAFile*
-    instance.  *f* may be a filename or a file-like object opened with write
-    mode.'''
-    if isinstance(fasta,dict) :
-        fasta_itr = fasta.iteritems()
-    else :
-        fasta_itr = fasta
-
-    if isinstance(f,str) :
-        f = open(str,'w')
-
-    for header, seq in fasta_itr :
-        if linelen is not None :
-            seq = fill(seq,linelen)
-        f.write('>%s\n%s\n'%(header,seq))
-    f.close()
-
-
-class FASTAFile(object) :
-    '''A file-like object providing information and statistics about the
-    sequences in a FASTA formatted file.  Efficiently iterates through a
-    text or gzipped FASTA file and provides sequential or random access to
-    the records.  Instances store header and sequence data as they are read.
-
-      >>> fasta_str = StringIO(">seq1\\nACATAGGGAT\\n>seq2\\nTTATNTAGATA\\n")
-      >>> fasta_f = FASTAFile(fasta_str)
-      >>> [r for r in fasta_f]
-      [('seq1', 'ACATAGGGAT'), ('seq2', 'TTATNTAGATA')]
-      >>> fasta_f['seq1']
-      ACATAGGGAT
-      >>> fasta_f.headers
-      ['seq1', 'seq2']
-      >>> fasta_f.sequences
-      ['ACATAGGGAT', 'TTATNTAGATA']
-
-    Instances have the following members:
-
-    **headers**
-      list of FASTA headers in original order
-
-    **sequences**
-      list of FASTA sequences in original order
-
-    .. NOTE::
-       The members **headers** and **sequences** are not available until the
-       the FASTA records have been iterated once.
-
-    When indexing like `fasta_f['seq1']`, the class assumes all headers are
-    unique, iterating does not make this assumption.
-    '''
-
-    def __init__(self,f) :
-        self._f = f
-        self._fasta_itr = fasta_itr(f)
-        self.headers = []
-        self.sequences = []
-        self._dict = {}
-
-    def __getitem__(self,key) :
-        return self._dict[key]
-
-    def __setitem__(self,key,val) :
-        self._dict[key] = val
-
-    def next(self) :
-        '''Returns next FASTA record in the file as (header, sequence) tuple.'''
-
-        if self._fasta_itr is None :
-            self._fasta_itr = izip(self.headers,self.sequences)
-
-        try :
-            header, seq = self._fasta_itr.next()
-        except StopIteration, e :
-            self._fasta_itr = None
-            self._f = None
-            raise e
-
-        if self._f is not None :
-            # this means we're not done reading through the file yet
-            self.headers.append(header)
-            self.sequences.append(seq)
-            self._dict[header] = seq
-
-        return header, seq
-
-    def __iter__(self) :
-        return self
-
-# FASTQ functions and classes
-def fastq_itr(f) :
-    '''Returns a generator that iterates through a FASTQ formatted file.
-    *f* may be either a text or gzipped file, or a file-like python object
-    representing either of these.  Records are returned in the order they
-    are found.'''
-    if isinstance(f,str) :
-        f = open(f)
-
-    # check for magic number 1f 8b indicating gzip file, I dunno, just cuz
-    if f.read(2) == "\x1f\x8b" : f = gzip.GzipFile(f)
-    else : f.seek(0)
-
-    SEQ, QUAL = 0,1
-    in_region = SEQ
-    curr_header, curr_seq, curr_qual = None, None, None
-    for r in f :
-        if r.startswith('@') :
-            if curr_header is not None :
-                yield (curr_header, (curr_seq, curr_qual))
-            curr_header = r[1:].strip()
-            curr_seq = ''
-            curr_qual = ''
-            in_region = SEQ
-        elif r.startswith('+') :
-            in_region = QUAL
-        else :
-            curr_field = r.strip()
-            if in_region == SEQ :
-                curr_seq += curr_field
-            elif in_region == QUAL :
-                curr_qual += curr_field
-
-    # return the last record
-    yield (curr_header,(curr_seq,curr_qual))
-
-def fastq_to_dict(f) :
-    '''Returns a dictionary whose keys are FASTQ headers and values are
-    sequences.  *f* may be a text, gzipped file, or a file-like
-    python object representing either of these.'''
-    return dict(fastq_itr(f))
-
-def write_fastq_to_file(fastq,f,linelen=None) :
-    '''Writes the FASTQ records in *fasta* to file specified in *f*. *fastq*
-    may be a dictionary like that returned by *fastq_to_dict* or a *FASTQFile*
-    instance.  *f* may be a filename or a file-like object opened with write
-    mode.'''
-    if isinstance(fastq,dict) :
-        fastq_itr = fasta.iteritems()
-    else :
-        fastq_itr = fasta
-
-    f_out = open(str,'w') if isinstance(f,str) else f
-
-    for header, (seq, qual) in fastq_itr :
-        if linelen is not None :
-            seq = fill(seq,linelen)
-        f_out.write('>%s\n%s\n'%(header,seq))
-
-    if isinstance(f,str) :
-        f_out.close()
-
-
-class FASTQFile(object) :
-    '''A file-like object providing information and statistics about the
-    sequences in a FASTQ formatted file.  Efficiently iterates through a
-    text or gzipped FASTQ file and provides sequential or random access to
-    the records.  Instances store header and sequence data as they are read
-
-      >>> fastq_str = StringIO("@seq1\\nACATAGGGAT\\n+seq2\\nY^_cccQYJQ\\n
-      @seq2\\nTTATNTAGAT\\n+seq2\\nY^_cJcQQJQ")
-      >>> fastq_f = FASTQFile(fastq_str)
-      >>> [r for r in fastq_f]
-      [('seq1', ('ACATAGGGAT', 'Y^_cccQYJQ')), ('seq2', ('TTATNTAGATA', 'Y^_cJcQQJQ'))]
-      >>> fastq_f['seq1']
-      ('seq1', ('ACATAGGGAT', 'Y^_cccQYJQ'))
-      >>> fastq_f.headers
-      ['seq1', 'seq2']
-      >>> fastq_f.sequences
-      ['ACATAGGGAT', 'TTATNTAGAT']
-      >>> fastq_f.quals
-      ['Y^_cccQYJQ', 'Y^_cJcQQJQ']
-
-    Instances have the following members:
-
-    **headers**
-      list of FASTQ headers in original order
-
-    **sequences**
-      list of FASTQ sequences in original order
-
-    **quals**
-      list of FASTQ quality scores in original order
-
-    .. NOTE::
-       The members **headers**, **sequences**, and **quals** are not available
-       until the the FASTQ records have been iterated once
-
-    When indexing like `fastq_f['seq1']`, the class assumes all headers are
-    unique, iterating does not make this assumption.
-    '''
-
-    def __init__(self,f) :
-        self._f = f
-        self._fastq_itr = fastq_itr(f)
-        self.headers = []
-        self.sequences = []
-        self.quals = []
-        self._dict = {}
-
-    def __getitem__(self,key) :
-        return self._dict[key]
-
-    def __setitem__(self,key,val) :
-        self._dict[key] = val
-
-    def next(self) :
-        '''Returns next FASTA record in the file as (header, sequence) tuple.'''
-
-        if self._fastq_itr is None :
-            self._fastq_itr = izip(self.headers,self.sequences)
-
-        try :
-            header, (seq, qual) = self._fastq_itr.next()
-        except StopIteration, e :
-            self._fastq_itr = None
-            self._f = None
-            raise e
-
-        if self._f is not None :
-            # this means we're not done reading through the file yet
-            self.headers.append(header)
-            self.sequences.append(seq)
-            self.quals.append(qual)
-            self._dict[header] = (seq, qual)
-
-        return header, (seq, qual)
-
-    def __iter__(self) :
-        return self
-
--- a/chipsequtil-master/src/chipsequtil/util.py	Mon Mar 28 11:56:10 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,131 +0,0 @@
-"""Utility/helper classes and functions used by the chipsequtil package.
-"""
-
-import textwrap
-
-from optparse import IndentedHelpFormatter
-
-class MultiLineHelpFormatter(IndentedHelpFormatter) :
-    """An OptionParser formatter that preserves newline characters in
-    description and epilog fields and word-wraps all sequences of text
-    not interrupted by newline characters.
-    """
-
-    def _format_text(self, text) :
-        """Wrap paragraphs of text individually separated by
-        newlines (preserves explicit newline characters).
-        """
-        text_width = self.width - self.current_indent
-        indent = " "*self.current_indent
-        output_text = []
-        paragraphs = text.split('\n')
-        for p in paragraphs :
-            output_text.append(textwrap.fill(p,
-                                             text_width,
-                                             initial_indent=indent,
-                                             subsequent_indent=indent))
-        return '\n'.join(output_text)
-
-
-
-
-# A binary ordered tree example
-# shamelessly copied from: http://code.activestate.com/recipes/286239-binary-ordered-tree/
-class CNode:
-    left , right, data = None, None, 0
-
-    def __init__(self, data):
-        # initializes the data members
-        self.left = None
-        self.right = None
-        self.data = data
-
-
-class KeyedBinaryTree : # do this later...
-    pass
-
-
-class CBOrdTree:
-    def __init__(self):
-        # initializes the root member
-        self.root = None
-
-    def addNode(self, data):
-        # creates a new node and returns it
-        return CNode(data)
-
-    def insert(self, root, data):
-        # inserts a new data
-        if root == None:
-            # it there isn't any data
-            # adds it and returns
-            return self.addNode(data)
-        else:
-            # enters into the tree
-            if data <= root.data:
-                # if the data is less than the stored one
-                # goes into the left-sub-tree
-                root.left = self.insert(root.left, data)
-            else:
-                # processes the right-sub-tree
-                root.right = self.insert(root.right, data)
-            return root
-
-    def lookup(self, root, target):
-        # looks for a value into the tree
-        if root == None:
-            return 0
-        else:
-            # if it has found it...
-            if target == root.data:
-                return 1
-            else:
-                if target < root.data:
-                    # left side
-                    return self.lookup(root.left, target)
-                else:
-                    # right side
-                    return self.lookup(root.right, target)
-
-    def minValue(self, root):
-        # goes down into the left
-        # arm and returns the last value
-        while(root.left != None):
-            root = root.left
-        return root.data
-
-    def maxDepth(self, root):
-        if root == None:
-            return 0
-        else:
-            # computes the two depths
-            ldepth = self.maxDepth(root.left)
-            rdepth = self.maxDepth(root.right)
-            # returns the appropriate depth
-            return max(ldepth, rdepth) + 1
-
-    def size(self, root):
-        if root == None:
-            return 0
-        else:
-            return self.size(root.left) + 1 + self.size(root.right)
-
-    def printTree(self, root):
-        # prints the tree path
-        if root == None:
-            pass
-        else:
-            self.printTree(root.left)
-            print root.data,
-            self.printTree(root.right)
-
-    def printRevTree(self, root):
-        # prints the tree path in reverse
-        # order
-        if root == None:
-            pass
-        else:
-            self.printRevTree(root.right)
-            print root.data,
-            self.printRevTree(root.left)
-
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil/map_to_known_genes.py	Mon Mar 28 12:31:17 2016 -0400
@@ -0,0 +1,236 @@
+#!/usr/local/bin/python
+
+import sys, os
+from optparse import OptionParser
+from collections import defaultdict as dd
+from csv import DictReader, DictWriter
+
+from chipsequtil import MACSFile, BEDFile, KnownGeneFile, parse_number
+from chipsequtil.util import MultiLineHelpFormatter
+
+usage = '%prog [options] <knownGene file> <knownGene xRef file> <peaks file>'
+description = """
+Map the peaks in <peaks file> to genes in <knownGene file>.  <knownGene file> is\
+format is as specified in http://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/knownGene.sql.\
+<peaks file> format is as produced by MACS.  If *auto* is chosen (default) file extension \
+is examined for *.xls* for default MACS format or *.bed* for BED format.  If the --detail \
+option is provided, the following extra fields are appended to each row:
+
+peak loc, dist from feature, map type, map subtype
+"""
+epilog = ''
+parser = OptionParser(usage=usage,description=description,epilog=epilog,formatter=MultiLineHelpFormatter())
+parser.add_option('--upstream-window',dest='upst_win',type='int',default=5500,help='window width in base pairs to consider promoter region [default: %default]')
+parser.add_option('--downstream-window',dest='dnst_win',type='int',default=2500,help='window width in base pairs to consider downstream region [default: %default]')
+parser.add_option('--tss',dest='tss',action='store_true',help='calculate downstream window from transcription start site instead of transcription end site')
+parser.add_option('--map-output',dest='peak_output',default=None,help='filename to output mapped peaks to [default: stdout]')
+parser.add_option('--stats-output',dest='stats_output',default=sys.stderr,help='filename to output summary stats in conversion [default: stderr]')
+parser.add_option('--peaks-format',dest='peaks_fmt',default='auto',type='choice',choices=['auto','MACS','BED'],help='format of peaks input file [default: %default]')
+parser.add_option('--detail',dest='detail',action='store_true',help='add extra fields to output, see description')
+parser.add_option('--intergenic',dest='intergenic',action='store_true',help='write intergenic peaks to the gene file as well with None as gene ID')
+#parser.add_option('--symbol-xref',dest='symbol_xref',default=None,help='use the kgXref table file supplied to find a gene symbol, output as second column')
+
+# TODO - options
+#parser.add_option('--use-cds',dest='use_cds',action='store_true',help='use cdsStart and cdsEnd fields instead of txStart and txEnd to do mapping')
+#parser.add_option('--capture-intergenic'...)
+#parser.add_option('--map-format',dest='peak_format',type='choice',choices=['default','BED'],help='format of peak output [default: %default]')
+#parser.add_option('--stats-format',dest='stats_format',type='choice',choices=['human','python'],help='format of summary stats output [default: %default]')
+
+def parse_gene_ref(ref_gene) :
+    reader = KnownGeneFile(ref_gene)
+    gene_ref = dd(list)
+    for ref_dict in reader :
+        gene_ref[ref_dict['chrom']].append(ref_dict)
+
+    return gene_ref
+
+def parse_gene_ref_line(l) :
+    l = map(parse_number, l) # coerce to numbers where possible
+    l[9] = map(parse_number, l[9].split(',')) # turn 'x,x,x,...' into list
+    l[10] = map(parse_number, l[10].split(','))
+    return l
+
+if __name__ == '__main__' :
+
+    opts, args = parser.parse_args(sys.argv[1:])
+
+    if len(args) < 3 :
+        parser.error('Must provide three filename arguments')
+
+    gene_ref = parse_gene_ref(args[0])
+    xref_fn = args[1]
+    peaks_fn = args[2]
+    if opts.peaks_fmt == 'auto' :
+        path,ext = os.path.splitext(peaks_fn)
+        if ext.lower() == '.xls' :
+            opts.peaks_fmt = 'MACS'
+        elif ext.lower() == '.bed' :
+            opts.peaks_fmt = 'BED'
+        elif ext.lower() == '.narrowpeak' :
+            opts.peaks_fmt = 'BED'
+        else :
+            parser.error('Could not guess peaks file format by extension (%s), aborting'%ext)
+
+    if opts.peaks_fmt == 'MACS' :
+        peaks_reader_cls = MACSFile
+        chr_field, start_field, end_field = 'chr', 'start', 'end'
+    elif opts.peaks_fmt == 'BED' :
+        peaks_reader_cls = BEDFile
+        chr_field, start_field, end_field = 'chrom', 'chromStart', 'chromEnd'
+    else :
+        # should never happen
+        fieldnames = []
+
+    #peaks_reader = DictReader(open(args[1]),fieldnames=fieldnames,delimiter='\t')
+    peaks_reader = peaks_reader_cls(peaks_fn)
+
+    # default output format:
+    if opts.peak_output :
+        peak_output = open(opts.peak_output,'w')
+    else :
+        peak_output = sys.stdout
+
+    fieldnames = peaks_reader.FIELD_NAMES
+    if opts.detail :
+        fieldnames += ["peak loc","dist from feature","map type","map subtype"]#"score"
+    output_fields = ['knownGeneID']+fieldnames
+
+    # see if the user wants gene symbols too
+    # TODO - actually make this an option, or make it required
+    opts.symbol_xref = xref_fn
+    if opts.symbol_xref :
+        kgXref_fieldnames = ['kgID','mRNA','spID','spDisplayID','geneSymbol','refseq','protAcc','description']
+        symbol_xref_reader = DictReader(open(opts.symbol_xref),fieldnames=kgXref_fieldnames,delimiter='\t')
+        symbol_xref_map = {}
+        for rec in symbol_xref_reader :
+            symbol_xref_map[rec['kgID']] = rec
+        output_fields = ['knownGeneID','geneSymbol']+fieldnames
+
+    peaks_writer = DictWriter(peak_output,output_fields,delimiter='\t',extrasaction='ignore',lineterminator='\n')
+    peaks_writer.writerow(dict([(k,k) for k in output_fields]))
+    unique_genes = set()
+    map_stats = dd(int)
+    for peak in peaks_reader :
+
+        # if this is a comment or header line get skip it
+        if peak[fieldnames[0]].startswith('#') or \
+           peak[fieldnames[0]] == fieldnames[0] or \
+           peak[fieldnames[0]].startswith('track') : continue
+
+        # coerce values to numeric if possible
+        for k,v in peak.items() : peak[k] = parse_number(v)
+
+        # MACS output gives us summit
+        if opts.peaks_fmt == 'MACS' :
+            peak_loc = peak[start_field]+peak['summit']
+        else : # peak assumed to be in the middle of the reported peak range
+            peak_loc = (peak[start_field]+peak[end_field])/2
+
+        chrom_genes = gene_ref[peak[chr_field]]
+
+        if len(chrom_genes) == 0 :
+            sys.stderr.write('WARNING: peak chromosome %s not found in gene reference, skipping: %s\n'%(peak[chr_field],peak))
+            continue
+
+        mapped = False
+
+        # walk through the genes for this chromosome
+        for gene in chrom_genes :
+
+            # reusable dictionary for output
+            out_d = {}.fromkeys(output_fields,0)
+            out_d.update(peak)
+            out_d['map type'] = ''
+            out_d['chromo'] = peak[chr_field]
+            out_d['peak loc'] = peak_loc
+
+            # determine intervals for promoter, gene, and downstream
+            if gene['strand'] == '+' :
+                promoter_coords = max(gene['txStart']-1-opts.upst_win,0), gene['txStart']-1
+                if opts.tss :
+                    gene_coords = gene['txStart'], min(gene['txEnd'],gene['txStart']+opts.dnst_win)
+                    downstream_coords = gene['txEnd']+1,gene['txStart']+opts.dnst_win
+                else :
+                    gene_coords = gene['txStart'], gene['txEnd']
+                    downstream_coords = gene['txEnd']+1, gene['txEnd']+1+opts.dnst_win
+            else :
+                promoter_coords = gene['txEnd']+1, gene['txEnd']+1+opts.upst_win # +1 because we're using 1 based indexing
+                if opts.tss :
+                    gene_coords = max(gene['txStart'],gene['txEnd']-opts.upst_win), gene['txEnd']
+                    downstream_coords = gene['txEnd']-1-opts.dnst_win, gene['txStart']-1 # -1 because we're using 1 based indexing
+                else :
+                    gene_coords = gene['txStart'], gene['txEnd']
+                    downstream_coords = gene['txStart']-1-opts.dnst_win, gene['txStart']-1 # -1 because we're using 1 based indexing
+
+            # check for promoter
+            if peak_loc >= promoter_coords[0] and peak_loc <= promoter_coords[1] :
+                out_d['map type'] = 'promoter'
+                out_d['dist from feature'] = peak_loc - promoter_coords[1] if gene['strand'] == '+' else promoter_coords[0] - peak_loc
+
+            # check for gene
+            elif peak_loc >= gene_coords[0] and peak_loc <= gene_coords[1] :
+                # check for intron/exon
+                exon_coords = zip(gene['exonStarts'],gene['exonEnds'])
+                in_exon = False
+                for st,en in exon_coords :
+                    if peak_loc >= st and peak_loc <= en :
+                        in_exon = True
+                        break
+                out_d['map type'] = 'gene'
+                out_d['map subtype'] = 'exon' if in_exon else 'intron'
+
+                #Commented out to keep score reported in bed file - AJD 7/29/14
+                # score = (peak-TSS)/(TSE-TSS) - peak distance from TSS as fraction of length of gene
+                #gene_len = float(gene_coords[1]-gene_coords[0])
+                #out_d['score'] = (peak_loc-gene_coords[0])/gene_len if gene['strand'] == '+' else (gene_coords[1]-peak_loc)/gene_len
+
+                # distance calculated from start of gene
+                out_d['dist from feature'] = peak_loc - promoter_coords[1] if gene['strand'] == '+' else promoter_coords[0] - peak_loc
+
+                map_stats[out_d['map subtype']] += 1
+
+            # check for downstream
+            elif peak_loc >= downstream_coords[0] and peak_loc <= downstream_coords[1] :
+                out_d['map type'] = 'after'
+                if opts.tss :
+                    out_d['dist from feature'] = peak_loc - gene_coords[0] if gene['strand'] == '+' else gene_coords[1] - peak_loc
+                else :
+                    out_d['dist from feature'] = peak_loc - downstream_coords[0] if gene['strand'] == '+' else downstream_coords[1] - peak_loc
+
+            # does not map to this gene
+            else :
+                pass
+
+            # map type is not blank if we mapped to something
+            if out_d['map type'] != '' :
+
+                #out_d = {'knownGeneID':gene['name']}
+                out_d['knownGeneID'] = gene['name']
+                if opts.symbol_xref :
+                    out_d['geneSymbol'] = symbol_xref_map[gene['name']]['geneSymbol']
+                peaks_writer.writerow(out_d)
+
+                mapped = True
+
+                # reset map_type
+                out_d['map type'] = ''
+
+        if not mapped :
+            if opts.intergenic :
+                out_d['knownGeneID'] = 'None'
+                out_d['geneSymbol'] = 'None'
+                out_d['map type'] = 'intergenic'
+                peaks_writer.writerow(out_d)
+            map_stats['intergenic'] += 1
+
+    if peak_output != sys.stdout :
+        peak_output.close()
+
+    #if opts.stats_output != sys.stderr :
+    #    opts.stats_output = open(opts.stats_output,'w')
+
+    #for k,v in map_stats.items() :
+    #    opts.stats_output.write('%s: %s\n'%(k,v))
+
+    #if opts.stats_output != sys.stderr :
+    #    opts.stats_output.close()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil/map_to_known_genes.xml	Mon Mar 28 12:31:17 2016 -0400
@@ -0,0 +1,46 @@
+<tool id="chipsequtil_maptoknowngenes" name="Map Peaks to Known Genes" version="0.1">
+  <description>
+    Map the peaks in &lt;peaks file&gt; to genes in &lt;knownGene file&gt;.  &lt;knownGene file&gt; isformat is as specified in http://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/knownGene.sql.&lt;peaks file&gt; format is as produced by MACS.  If *auto* is chosen (default) file extension is examined for *.xls* for default MACS format or *.bed* for BED format.  If the --detail option is provided, the following extra fields are appended to each row:
+    peak loc, dist from feature, map type, map subtype
+  </description>
+  <parallelism method="basic"></parallelism>
+  <requirements>
+    <requirement type="package">chipsequtil</requirement>
+  </requirements>
+  <command interpreter="python">
+    map_to_known_genes.py
+      $tss
+      --upstream-window=$upst_win
+      --downstream-window=$dnst_win
+      --map-output=$peaksOutput
+      --peaks-format=$peaks_fmt
+      $detail
+      $intergenic
+      $knownGeneFile $knownGeneRef $macsPeaksFile
+
+  </command>
+  <inputs>
+    <param name="knownGeneFile" type="data" label="knownGene file" help="" optional="false" />
+    <param name="knownGeneRef" type="data" label="knownGene xRef file" help="" optional="false" />
+    <param name="macsPeaksFile" type="data" label="Peaks File" help="" optional="false" />
+    <param name="peaksOutput" type="text" label="Output filename" help="filename to output mapped peaks to" optional="false" />
+
+    <param name="upst_win" type="integer" label="Upstream Window" help="Window width in base pairs to consider promoter region [default: %default]" optional="false" value="5500" />
+    <param name="dnst_win" type="integer" label="Downstream Window" help="Window width in base pairs to consider downstream region [default: %default]" optional="false" value="2500" />
+
+    <param name="tss" checked="true" label="calculate downstream window from transcription start site instead of transcription end site" type="boolean" truevalue="--tss" falsevalue="" help="" />
+
+    <param name="peaks_fmt" type="select" label="Peaks Format" help="Format of peaks input file" optional="false">
+        <option value="auto">auto</option>
+        <option value="MACS">MACS</option>
+        <option selected="true" value="BED">BED</option>
+    </param>
+
+    <param name="detail" checked="false" label="Add extra fields to output" type="boolean" truevalue="--detail" falsevalue="" help="" />
+    <param name="intergenic" checked="false" label="Write intergenic peaks to the gene file as well with None as gene ID" type="boolean" truevalue="--intergenic" falsevalue="" help="" />
+  </inputs>
+  <outputs>
+    <data format="txt" hidden="false" name="default"/>
+  </outputs>
+  <help></help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chipsequtil/tool_dependencies.xml	Mon Mar 28 12:31:17 2016 -0400
@@ -0,0 +1,15 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="chipsequtil" version="1.0">
+        <install version="1.0">
+            <actions>
+                <action type="download_by_url">https://github.com/adamlabadorf/chipsequtil/archive/master.zip</action>
+                <action type="shell_command">unzip chipsequtil-master.zip -d chipsequtil</action>
+                <action type="shell_command">cd chipsequtil</action>
+                <action type="shell_command">cp org_settings.cfg src/chipsequtil/</action>
+                <action type="shell_command">python setup.py install</action>
+            </actions>
+        </install>
+        <readme></readme>
+    </package>
+</tool_dependency>