changeset 0:d33030c8e2cc draft

Uploaded
author greg
date Tue, 17 Nov 2015 14:26:08 -0500
parents
children 8159aaa7da4b
files repmatch_gff3.py repmatch_gff3.xml repmatch_gff3_macros.xml repmatch_gff3_util.py test-data/closest_s_input1.gff test-data/detail_out1.tabular test-data/histogram_out1.pdf test-data/key_out1.tabular test-data/largest_s_input1.gff test-data/orphan_out1.tabular test-data/summary_out1.gff tool_dependencies.xml
diffstat 12 files changed, 932 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/repmatch_gff3.py	Tue Nov 17 14:26:08 2015 -0500
@@ -0,0 +1,51 @@
+# repmatch.py
+#
+# Replicate matching - matches paired peaks from two or more replicates
+#
+# Input: one or more gff files (simple output from cwpair2, each a list of paired peaks from a replicate
+#
+# Output: list of matched groups and list of unmatched orphans
+# Files: key.tabular (file to replicate IDsummary.tabular, detail.tabular, orphans.tabular
+
+import argparse
+import repmatch_gff3_util
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--input', dest='inputs', action='append', nargs=2, help="Input datasets")
+    parser.add_argument('--method', dest='method', default='closest', help='Method of finding match')
+    parser.add_argument('--distance', dest='distance', type=int, default=50, help='Maximum distance between peaks in different replicates to allow merging')
+    parser.add_argument('--step', dest='step', type=int, default=0, help='Step size of distance for each iteration')
+    parser.add_argument('--replicates', dest='replicates', type=int, default=2, help='Minimum number of replicates that must be matched for merging to occur')
+    parser.add_argument('--low_limit', dest='low_limit', type=int, default=-1000, help='Lower limit for c-w distance filter')
+    parser.add_argument('--up_limit', dest='up_limit', type=int, default=1000, help='Upper limit for c-w distance filter')
+    parser.add_argument('--output_files', dest='output_files', default='simple', help='Restrict output dataset collections.')
+    parser.add_argument('--plot_format', dest='plot_format', default=None, help='Output format for graph')
+    parser.add_argument('--output_summary', dest='output_summary', help='Matched groups in gff format')
+    parser.add_argument('--output_orphan', dest='output_orphan', default=None, help='Orphans in tabular format')
+    parser.add_argument('--output_detail', dest='output_detail', default=None, help='Details in tabular format')
+    parser.add_argument('--output_key', dest='output_key', default=None, help='Keys in tabular format')
+    parser.add_argument('--output_histogram', dest='output_histogram', default=None, help='Histogram in plot_format')
+
+    args = parser.parse_args()
+
+    dataset_paths = []
+    hids = []
+    for (dataset_path, hid) in args.inputs:
+        dataset_paths.append(dataset_path)
+        hids.append(hid)
+    repmatch_gff3_util.process_files(dataset_paths,
+                                     hids,
+                                     args.method,
+                                     args.distance,
+                                     args.step,
+                                     args.replicates,
+                                     args.up_limit,
+                                     args.low_limit,
+                                     args.output_files,
+                                     args.plot_format,
+                                     args.output_summary,
+                                     args.output_orphan,
+                                     args.output_detail,
+                                     args.output_key,
+                                     args.output_histogram)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/repmatch_gff3.xml	Tue Nov 17 14:26:08 2015 -0500
@@ -0,0 +1,118 @@
+<?xml version="1.0"?>
+<tool id="repmatch_gff3" name="RepMatch" version="@WRAPPER_VERSION@.0">
+    <description>Match paired peaks from two or more replicates</description>
+    <macros>
+        <import>repmatch_gff3_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command>
+        python $__tool_directory__/repmatch_gff3.py
+        #for $i in $input:
+             --input "${i}" "${i.hid}"
+        #end for
+        --method $method
+        --distance $distance
+        --step $step
+        --replicates $replicates
+        --low_limit $low_limit
+        --up_limit $up_limit
+        --output_files $output_files_cond.output_files
+        --output_summary "$output_summary"
+        #if str($output_files_cond.output_files) in ["all", "simple_orphan"]:
+            --output_orphan "$output_orphan"
+        #end if
+        #if str($output_files_cond.output_files) =="all":
+            --plot_format $output_files_cond.plot_format
+            --output_detail "$output_detail"
+            --output_key "$output_key"
+            --output_histogram "$output_histogram"
+        #end if
+    </command>
+    <inputs>
+        <param  name="input" type="data" format="gff" multiple="True" min="2" label="Match paired peaks on" />
+        <param name="method" type="select" label="Method of finding match">
+            <option value="closest" selected="True">Closest</option>
+            <option value="largest">Largest</option>
+            <option value="all">All</option>
+        </param>
+        <param name="distance" type="integer" value="50" min="0" label="Maximum distance between peaks in different replicates to allow merging" />
+        <param name="step" type="integer" value="0" min="0" label="Step size" help="Distance for each iteration" />
+        <param name="replicates" type="integer" value="2" min="2" label="Minimum number of replicates that must be matched for merging to occur" />
+        <param name="low_limit" type="integer" value="-1000" label="Lower limit for Crick-Watson distance filter" />
+        <param name="up_limit" type="integer" value="1000" label="Upper limit for Crick-Watson distance filter" />
+        <conditional name="output_files_cond">
+            <param name="output_files" type="select" label="Restrict output to" help="Statistics will always be generated." >
+                <option value="all" selected="True">no restrictions (output everything)</option>
+                <option value="simple">matched pairs only</option>
+                <option value="simple_orphan">matched pairs and orphans only</option>
+            </param>
+            <when value="simple" />
+            <when value="simple_orphan" />
+            <when value="all">
+                <param name="plot_format" type="select" label="Output format for graph">
+                    <option value="pdf" selected="True">Pdf</option>
+                    <option value="png">Png</option>
+                    <option value="svg">Svg</option>
+                </param>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="output_summary" format="gff" label="Matched pairs: ${tool.name} on ${on_string}" />
+        <data name="output_orphan" format="tabular" label="Orphans: ${tool.name} on ${on_string}">
+            <filter>output_files_cond["output_files"] in ["all", "simple_orphan"]</filter>
+       </data>
+        <data name="output_detail" format="tabular" label="Details: ${tool.name} on ${on_string}">
+            <filter>output_files_cond["output_files"] == "all"</filter>
+       </data>
+        <data name="output_key" format="tabular" label="Key: ${tool.name} on ${on_string}">
+            <filter>output_files_cond["output_files"] == "all"</filter>
+       </data>
+        <data name="output_histogram" format="pdf" label="Histogram: ${tool.name} on ${on_string}">
+            <filter>output_files_cond["output_files"] == "all"</filter>
+            <actions>
+                <action type="format">
+                    <option type="from_param" name="output_files_cond.plot_format" />
+                 </action>
+           </actions>
+       </data>
+    </outputs>
+    <tests>
+        <param name="input" value="closest_s_input1.gff" ftype="gff" />
+        <param name="input" value="largest_s_input1.gff" ftype="gff" />
+        <param name="method" value="closest" />
+        <param name="distance" value="50" />
+        <param name="step" value="0" />
+        <param name="replicates" value="2" />
+        <param name="low_limit" value="-1000" />
+        <param name="up_limit" value="1000" />
+        <param name="output_files" value="all" />
+        <param name="plot_format" value="png" />
+        <output name="output_summary" file="summary_out1.gff" ftype="gff" />
+        <output name="output_orphan" file="orphan_out1.tabular" ftype="tabular" />
+        <output name="output_detail" file="orphan_detail1.tabular" ftype="tabular" />
+        <output name="output_key" file="key_out1.tabular" ftype="tabular" />
+        <output name="output_histogram" file="histogram_out1.pdf" ftype="pdf" compare="sim_size" />
+    </tests>
+    <help>
+**What it does**
+<![CDATA[
+
+Replicate matching - matches paired peaks from two or more replicates, and produces a list of matched
+groups and optionally a list of matched orphans.  Additional optional outputs include the median read
+count for each input, details and a histogram.
+
+]]>
+
+**Options**
+
+* **Method** - Method to use when calling replicates.
+* **Distance** - Maximum distance between peaks in different replicates to allow merging.
+* **Step Size** - Distance for each iteration.
+* **Replicates** - Minimum number of replicates to call a peak.  Number of replicates required must be at least 2.
+* **Lower Limit** - Lower limit for the Crick-Watson distance filter.
+* **Upper Limit** - Upper limit for the Crick-Watson distance filter.
+* **Plot Format** - Output format for graph.  The options are PNG or SVG.
+    </help>
+    <expand macro="citations" />
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/repmatch_gff3_macros.xml	Tue Nov 17 14:26:08 2015 -0500
@@ -0,0 +1,29 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<macros>
+    <token name="@WRAPPER_VERSION@">1.0</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="2.3.0">anaconda</requirement>
+        </requirements>
+    </xml>
+    <xml name="stdio">
+        <stdio>
+            <exit_code range="1:"/>
+            <exit_code range=":-1"/>
+            <regex match="Error:"/>
+            <regex match="Exception:"/>
+        </stdio>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="bibtex">
+                @unpublished{None,
+                author = {None},
+                title = {None},
+                year = {None},
+                eprint = {None},
+                url = {http://www.huck.psu.edu/content/research/independent-centers-excellence/center-for-eukaryotic-gene-regulation}
+            }</citation>
+        </citations>
+    </xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/repmatch_gff3_util.py	Tue Nov 17 14:26:08 2015 -0500
@@ -0,0 +1,463 @@
+import bisect
+import csv
+import os
+import shutil
+import sys
+import tempfile
+
+from matplotlib import pyplot
+
+# Graph settings
+Y_LABEL = 'Counts'
+X_LABEL = 'Number of matched replicates'
+TICK_WIDTH = 3
+# Amount to shift the graph to make labels fit, [left, right, top, bottom]
+ADJUST = [0.180, 0.9, 0.9, 0.1]
+# Length of tick marks, use TICK_WIDTH for width
+pyplot.rc('xtick.major', size=10.00)
+pyplot.rc('ytick.major', size=10.00)
+pyplot.rc('lines', linewidth=4.00)
+pyplot.rc('axes', linewidth=3.00)
+pyplot.rc('font', family='Arial', size=32.0)
+
+PLOT_FORMATS = ['png', 'pdf', 'svg']
+COLORS = 'krb'
+
+
+class Replicate(object):
+
+    def __init__(self, id, dataset_path):
+        self.id = id
+        self.dataset_path = dataset_path
+        self.parse(csv.reader(open(dataset_path, 'rt'), delimiter='\t'))
+
+    def parse(self, reader):
+        self.chromosomes = {}
+        for line in reader:
+            if line[0].startswith("#") or line[0].startswith('"'):
+                continue
+            cname, junk, junk, mid, midplus, value, strand, junk, attrs = line
+            attrs = parse_gff_attrs(attrs)
+            distance = attrs['cw_distance']
+            mid = int(mid)
+            midplus = int(midplus)
+            value = float(value)
+            distance = int(distance)
+            if cname not in self.chromosomes:
+                self.chromosomes[cname] = Chromosome(cname)
+            chrom = self.chromosomes[cname]
+            chrom.add_peak(Peak(cname, mid, value, distance, self))
+        for chrom in self.chromosomes.values():
+            chrom.sort_by_index()
+
+    def filter(self, up_limit, low_limit):
+        for chrom in self.chromosomes.values():
+            chrom.filter(up_limit, low_limit)
+
+    def size(self):
+        return sum([len(c.peaks) for c in self.chromosomes.values()])
+
+
+class Chromosome(object):
+
+    def __init__(self, name):
+        self.name = name
+        self.peaks = []
+
+    def add_peak(self, peak):
+        self.peaks.append(peak)
+
+    def sort_by_index(self):
+        self.peaks.sort(key=lambda peak: peak.midpoint)
+        self.keys = make_keys(self.peaks)
+
+    def remove_peak(self, peak):
+        i = bisect.bisect_left(self.keys, peak.midpoint)
+        # If the peak was actually found
+        if i < len(self.peaks) and self.peaks[i].midpoint == peak.midpoint:
+            del self.keys[i]
+            del self.peaks[i]
+
+    def filter(self, up_limit, low_limit):
+        self.peaks = [p for p in self.peaks if low_limit <= p.distance <= up_limit]
+        self.keys = make_keys(self.peaks)
+
+
+class Peak(object):
+
+    def __init__(self, chrom, midpoint, value, distance, replicate):
+        self.chrom = chrom
+        self.value = value
+        self.midpoint = midpoint
+        self.distance = distance
+        self.replicate = replicate
+
+    def normalized_value(self, med):
+        return self.value * med / self.replicate.median
+
+
+class PeakGroup(object):
+
+    def __init__(self):
+        self.peaks = {}
+
+    def add_peak(self, repid, peak):
+        self.peaks[repid] = peak
+
+    @property
+    def chrom(self):
+        return self.peaks.values()[0].chrom
+
+    @property
+    def midpoint(self):
+        return median([peak.midpoint for peak in self.peaks.values()])
+
+    @property
+    def num_replicates(self):
+        return len(self.peaks)
+
+    @property
+    def median_distance(self):
+        return median([peak.distance for peak in self.peaks.values()])
+
+    @property
+    def value_sum(self):
+        return sum([peak.value for peak in self.peaks.values()])
+
+    def normalized_value(self, med):
+        values = []
+        for peak in self.peaks.values():
+            values.append(peak.normalized_value(med))
+        return median(values)
+
+    @property
+    def peakpeak_distance(self):
+        keys = self.peaks.keys()
+        return abs(self.peaks[keys[0]].midpoint - self.peaks[keys[1]].midpoint)
+
+
+class FrequencyDistribution(object):
+
+    def __init__(self, d=None):
+        self.dist = d or {}
+
+    def add(self, x):
+        self.dist[x] = self.dist.get(x, 0) + 1
+
+    def graph_series(self):
+        x = []
+        y = []
+        for key, val in self.dist.items():
+            x.append(key)
+            y.append(val)
+        return x, y
+
+    def mode(self):
+        return max(self.dist.items(), key=lambda data: data[1])[0]
+
+    def size(self):
+        return sum(self.dist.values())
+
+
+def stop_err(msg):
+    sys.stderr.write(msg)
+    sys.exit(1)
+
+
+def median(data):
+    """
+    Find the integer median of the data set.
+    """
+    if not data:
+        return 0
+    sdata = sorted(data)
+    if len(data) % 2 == 0:
+        return (sdata[len(data)//2] + sdata[len(data)//2-1]) / 2
+    else:
+        return sdata[len(data)//2]
+
+
+def make_keys(peaks):
+    return [data.midpoint for data in peaks]
+
+
+def get_window(chromosome, target_peaks, distance):
+    """
+    Returns a window of all peaks from a replicate within a certain distance of
+    a peak from another replicate.
+    """
+    lower = target_peaks[0].midpoint
+    upper = target_peaks[0].midpoint
+    for peak in target_peaks:
+        lower = min(lower, peak.midpoint - distance)
+        upper = max(upper, peak.midpoint + distance)
+    start_index = bisect.bisect_left(chromosome.keys, lower)
+    end_index = bisect.bisect_right(chromosome.keys, upper)
+    return (chromosome.peaks[start_index: end_index], chromosome.name)
+
+
+def match_largest(window, peak, chrum):
+    if not window:
+        return None
+    if peak.chrom != chrum:
+        return None
+    return max(window, key=lambda cpeak: cpeak.value)
+
+
+def match_closest(window, peak, chrum):
+    if not window:
+        return None
+    if peak.chrom != chrum:
+        return None
+    return min(window, key=lambda match: abs(match.midpoint - peak.midpoint))
+
+
+def frequency_histogram(freqs, dataset_path, labels=[], title=''):
+    pyplot.clf()
+    pyplot.figure(figsize=(10, 10))
+    for i, freq in enumerate(freqs):
+        xvals, yvals = freq.graph_series()
+        # Go from high to low
+        xvals.reverse()
+        pyplot.bar([x-0.4 + 0.8/len(freqs)*i for x in xvals], yvals, width=0.8/len(freqs), color=COLORS[i])
+    pyplot.xticks(range(min(xvals), max(xvals)+1), map(str, reversed(range(min(xvals), max(xvals)+1))))
+    pyplot.xlabel(X_LABEL)
+    pyplot.ylabel(Y_LABEL)
+    pyplot.subplots_adjust(left=ADJUST[0], right=ADJUST[1], top=ADJUST[2], bottom=ADJUST[3])
+    ax = pyplot.gca()
+    for l in ax.get_xticklines() + ax.get_yticklines():
+        l.set_markeredgewidth(TICK_WIDTH)
+    pyplot.savefig(dataset_path)
+
+
+METHODS = {'closest': match_closest, 'largest': match_largest}
+
+
+def gff_attrs(d):
+    if not d:
+        return '.'
+    return ';'.join('%s=%s' % item for item in d.items())
+
+
+def parse_gff_attrs(s):
+    d = {}
+    if s == '.':
+        return d
+    for item in s.split(';'):
+        key, val = item.split('=')
+        d[key] = val
+    return d
+
+
+def gff_row(cname, start, end, score, source, type='.', strand='.', phase='.', attrs={}):
+    return (cname, source, type, start, end, score, strand, phase, gff_attrs(attrs))
+
+
+def get_temporary_plot_path(plot_format):
+    """
+    Return the path to a temporary file with a valid image format
+    file extension that can be used with bioformats.
+    """
+    tmp_dir = tempfile.mkdtemp(prefix='tmp-repmatch-')
+    fd, name = tempfile.mkstemp(suffix='.%s' % plot_format, dir=tmp_dir)
+    os.close(fd)
+    return name
+
+
+def process_files(dataset_paths, galaxy_hids, method, distance, step, replicates, up_limit, low_limit, output_files,
+                  plot_format, output_summary, output_orphan, output_detail, output_key, output_histogram):
+    output_histogram_file = output_files in ["all"] and method in ["all"]
+    if len(dataset_paths) < 2:
+        return
+    if method == 'all':
+        match_methods = METHODS.keys()
+    else:
+        match_methods = [method]
+    for match_method in match_methods:
+        statistics = perform_process(dataset_paths,
+                                     galaxy_hids,
+                                     match_method,
+                                     distance,
+                                     step,
+                                     replicates,
+                                     up_limit,
+                                     low_limit,
+                                     output_files,
+                                     plot_format,
+                                     output_summary,
+                                     output_orphan,
+                                     output_detail,
+                                     output_key,
+                                     output_histogram)
+    if output_histogram_file:
+        tmp_histogram_path = get_temporary_plot_path(plot_format)
+        frequency_histogram([stat['distribution'] for stat in [statistics]],
+                            tmp_histogram_path,
+                            METHODS.keys())
+        shutil.move(tmp_histogram_path, output_histogram)
+
+
+def perform_process(dataset_paths, galaxy_hids, method, distance, step, num_required, up_limit, low_limit, output_files,
+                    plot_format, output_summary, output_orphan, output_detail, output_key, output_histogram):
+    output_detail_file = output_files in ["all"] and output_detail is not None
+    output_key_file = output_files in ["all"] and output_key is not None
+    output_orphan_file = output_files in ["all", "simple_orphan"] and output_orphan is not None
+    output_histogram_file = output_files in ["all"] and output_histogram is not None
+    replicates = []
+    for i, dataset_path in enumerate(dataset_paths):
+        try:
+            galaxy_hid = galaxy_hids[i]
+            r = Replicate(galaxy_hid, dataset_path)
+            replicates.append(r)
+        except Exception, e:
+            stop_err('Unable to parse file "%s", exception: %s' % (dataset_path, str(e)))
+    attrs = 'd%sr%s' % (distance, num_required)
+    if up_limit != 1000:
+        attrs += 'u%d' % up_limit
+    if low_limit != -1000:
+        attrs += 'l%d' % low_limit
+    if step != 0:
+        attrs += 's%d' % step
+
+    def td_writer(file_path):
+        # Returns a tab-delimited writer for a certain output
+        return csv.writer(open(file_path, 'wt'), delimiter='\t')
+
+    labels = ('chrom',
+              'median midpoint',
+              'median midpoint+1',
+              'median normalized reads',
+              'replicates',
+              'median c-w distance',
+              'reads sum')
+    for replicate in replicates:
+        labels += ('chrom',
+                   'median midpoint',
+                   'median midpoint+1',
+                   'c-w sum',
+                   'c-w distance',
+                   'replicate id')
+    summary_output = td_writer(output_summary)
+    if output_key_file:
+        key_output = td_writer(output_key)
+        key_output.writerow(('data', 'median read count'))
+    if output_detail_file:
+        detail_output = td_writer(output_detail)
+        detail_output.writerow(labels)
+    if output_orphan_file:
+        orphan_output = td_writer(output_orphan)
+        orphan_output.writerow(('chrom', 'midpoint', 'midpoint+1', 'c-w sum', 'c-w distance', 'replicate id'))
+    # Perform filtering
+    if up_limit < 1000 or low_limit > -1000:
+        for replicate in replicates:
+            replicate.filter(up_limit, low_limit)
+    # Actually merge the peaks
+    peak_groups = []
+    orphans = []
+    freq = FrequencyDistribution()
+
+    def do_match(reps, distance):
+        # Copy list because we will mutate it, but keep replicate references.
+        reps = reps[:]
+        while len(reps) > 1:
+            # Iterate over each replicate as "main"
+            main = reps[0]
+            reps.remove(main)
+            for chromosome in main.chromosomes.values():
+                peaks_by_value = chromosome.peaks[:]
+                # Sort main replicate by value
+                peaks_by_value.sort(key=lambda peak: -peak.value)
+
+                def search_for_matches(group):
+                    # Here we use multiple passes, expanding the window to be
+                    #  +- distance from any previously matched peak.
+                    while True:
+                        new_match = False
+                        for replicate in reps:
+                            if replicate.id in group.peaks:
+                                # Stop if match already found for this replicate
+                                continue
+                            try:
+                                # Lines changed to remove a major bug by Rohit Reja.
+                                window, chrum = get_window(replicate.chromosomes[chromosome.name],
+                                                           group.peaks.values(),
+                                                           distance)
+                                match = METHODS[method](window, peak, chrum)
+                            except KeyError:
+                                continue
+                            if match:
+                                group.add_peak(replicate.id, match)
+                                new_match = True
+                        if not new_match:
+                            break
+                # Attempt to enlarge existing peak groups
+                for group in peak_groups:
+                    old_peaks = group.peaks.values()[:]
+                    search_for_matches(group)
+                    for peak in group.peaks.values():
+                        if peak not in old_peaks:
+                            peak.replicate.chromosomes[chromosome.name].remove_peak(peak)
+                # Attempt to find new peaks groups.  For each peak in the
+                # main replicate, search for matches in the other replicates
+                for peak in peaks_by_value:
+                    matches = PeakGroup()
+                    matches.add_peak(main.id, peak)
+                    search_for_matches(matches)
+                    # Were enough replicates matched?
+                    if matches.num_replicates >= num_required:
+                        for peak in matches.peaks.values():
+                            peak.replicate.chromosomes[chromosome.name].remove_peak(peak)
+                        peak_groups.append(matches)
+    # Zero or less = no stepping
+    if step <= 0:
+        do_match(replicates, distance)
+    else:
+        for d in range(0, distance, step):
+            do_match(replicates, d)
+    for group in peak_groups:
+        freq.add(group.num_replicates)
+    # Collect together the remaining orphans
+    for replicate in replicates:
+        for chromosome in replicate.chromosomes.values():
+            for peak in chromosome.peaks:
+                freq.add(1)
+                orphans.append(peak)
+    # Average the orphan count in the graph by # replicates
+    med = median([peak.value for group in peak_groups for peak in group.peaks.values()])
+    for replicate in replicates:
+        replicate.median = median([peak.value for group in peak_groups for peak in group.peaks.values() if peak.replicate == replicate])
+        key_output.writerow((replicate.id, replicate.median))
+    for group in peak_groups:
+        # Output summary (matched pairs).
+        summary_output.writerow(gff_row(cname=group.chrom,
+                                        start=group.midpoint,
+                                        end=group.midpoint+1,
+                                        source='repmatch',
+                                        score=group.normalized_value(med),
+                                        attrs={'median_distance': group.median_distance,
+                                               'replicates': group.num_replicates,
+                                               'value_sum': group.value_sum}))
+        if output_detail_file:
+            summary = (group.chrom,
+                       group.midpoint,
+                       group.midpoint+1,
+                       group.normalized_value(med),
+                       group.num_replicates,
+                       group.median_distance,
+                       group.value_sum)
+            for peak in group.peaks.values():
+                summary += (peak.chrom, peak.midpoint, peak.midpoint+1, peak.value, peak.distance, peak.replicate.id)
+            detail_output.writerow(summary)
+    if output_orphan_file:
+        for orphan in orphans:
+            orphan_output.writerow((orphan.chrom,
+                                    orphan.midpoint,
+                                    orphan.midpoint+1,
+                                    orphan.value,
+                                    orphan.distance,
+                                    orphan.replicate.id))
+    if output_histogram_file:
+        tmp_histogram_path = get_temporary_plot_path(plot_format)
+        frequency_histogram([freq], tmp_histogram_path)
+        shutil.move(tmp_histogram_path, output_histogram)
+    return {'distribution': freq}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/closest_s_input1.gff	Tue Nov 17 14:26:08 2015 -0500
@@ -0,0 +1,66 @@
+chr1	cwpair	.	59	60	2881.0	.	.	cw_distance=2
+chr1	cwpair	.	123	124	4204.0	.	.	cw_distance=52
+chr1	cwpair	.	156	157	2177.0	.	.	cw_distance=59
+chr1	cwpair	.	218	219	4022.0	.	.	cw_distance=14
+chr1	cwpair	.	265	266	2474.0	.	.	cw_distance=48
+chr1	cwpair	.	268	269	4088.0	.	.	cw_distance=6
+chr1	cwpair	.	325	326	1171.0	.	.	cw_distance=16
+chr1	cwpair	.	370	371	899.0	.	.	cw_distance=25
+chr1	cwpair	.	388	389	359.0	.	.	cw_distance=20
+chr1	cwpair	.	452	453	504.0	.	.	cw_distance=8
+chr1	cwpair	.	500	501	569.0	.	.	cw_distance=-44
+chr1	cwpair	.	668	669	319.0	.	.	cw_distance=-48
+chr1	cwpair	.	6218	6219	2125.0	.	.	cw_distance=91
+chr1	cwpair	.	6454	6455	1249.0	.	.	cw_distance=63
+chr1	cwpair	.	6714	6715	433.0	.	.	cw_distance=-4
+chr1	cwpair	.	19213	19214	778.0	.	.	cw_distance=-25
+chr1	cwpair	.	22580	22581	863.0	.	.	cw_distance=-2
+chr1	cwpair	.	25305	25306	1183.0	.	.	cw_distance=99
+chr1	cwpair	.	31670	31671	490.0	.	.	cw_distance=66
+chr1	cwpair	.	32483	32484	478.0	.	.	cw_distance=48
+chr1	cwpair	.	39076	39077	1350.0	.	.	cw_distance=-29
+chr1	cwpair	.	39237	39238	362.0	.	.	cw_distance=61
+chr1	cwpair	.	45670	45671	493.0	.	.	cw_distance=-35
+chr1	cwpair	.	55548	55549	956.0	.	.	cw_distance=86
+chr1	cwpair	.	59228	59229	565.0	.	.	cw_distance=56
+chr1	cwpair	.	65160	65161	618.0	.	.	cw_distance=-4
+chr1	cwpair	.	70792	70793	2146.0	.	.	cw_distance=12
+chr1	cwpair	.	72731	72732	710.0	.	.	cw_distance=100
+chr1	cwpair	.	72805	72806	869.0	.	.	cw_distance=29
+chr1	cwpair	.	86982	86983	2013.0	.	.	cw_distance=37
+chr1	cwpair	.	87044	87045	1191.0	.	.	cw_distance=30
+chr1	cwpair	.	87109	87110	2259.0	.	.	cw_distance=3
+chr1	cwpair	.	87162	87163	5531.0	.	.	cw_distance=11
+chr1	cwpair	.	87194	87195	3643.0	.	.	cw_distance=27
+chr1	cwpair	.	92421	92422	1388.0	.	.	cw_distance=0
+chr1	cwpair	.	92567	92568	789.0	.	.	cw_distance=28
+chr1	cwpair	.	92645	92646	2397.0	.	.	cw_distance=8
+chr1	cwpair	.	95955	95956	689.0	.	.	cw_distance=51
+chr1	cwpair	.	96919	96920	12.0	.	.	cw_distance=3
+chr1	cwpair	.	98551	98552	122.0	.	.	cw_distance=27
+chr1	cwpair	.	101399	101400	2361.0	.	.	cw_distance=-44
+chr1	cwpair	.	106047	106048	572.0	.	.	cw_distance=7
+chr1	cwpair	.	108611	108612	573.0	.	.	cw_distance=-45
+chr1	cwpair	.	113782	113783	716.0	.	.	cw_distance=-20
+chr1	cwpair	.	116649	116650	773.0	.	.	cw_distance=-41
+chr1	cwpair	.	124306	124307	761.0	.	.	cw_distance=-43
+chr1	cwpair	.	134230	134231	659.0	.	.	cw_distance=100
+chr1	cwpair	.	136369	136370	365.0	.	.	cw_distance=-14
+chr1	cwpair	.	138876	138877	711.0	.	.	cw_distance=-4
+chr1	cwpair	.	139230	139231	1179.0	.	.	cw_distance=15
+chr1	cwpair	.	151365	151366	595.0	.	.	cw_distance=-28
+chr1	cwpair	.	155079	155080	1573.0	.	.	cw_distance=83
+chr1	cwpair	.	169095	169096	1887.0	.	.	cw_distance=-43
+chr1	cwpair	.	170134	170135	657.0	.	.	cw_distance=10
+chr1	cwpair	.	173276	173277	546.0	.	.	cw_distance=8
+chr1	cwpair	.	180331	180332	97.0	.	.	cw_distance=82
+chr1	cwpair	.	185109	185110	1371.0	.	.	cw_distance=46
+chr1	cwpair	.	197535	197536	5.0	.	.	cw_distance=73
+chr1	cwpair	.	199413	199414	810.0	.	.	cw_distance=-30
+chr1	cwpair	.	203863	203864	1476.0	.	.	cw_distance=-37
+chr1	cwpair	.	228672	228673	626.0	.	.	cw_distance=58
+chr1	cwpair	.	229759	229760	4531.0	.	.	cw_distance=16
+chr1	cwpair	.	229762	229763	699.0	.	.	cw_distance=63
+chr1	cwpair	.	230125	230126	44.0	.	.	cw_distance=10
+chr1	cwpair	.	230157	230158	15.0	.	.	cw_distance=5
+chr1	cwpair	.	230178	230179	56.0	.	.	cw_distance=10
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/detail_out1.tabular	Tue Nov 17 14:26:08 2015 -0500
@@ -0,0 +1,65 @@
+chrom	median midpoint	median midpoint+1	median normalized reads	replicates	median c-w distance	reads sum	chrom	median midpoint	median midpoint+1	c-w sum	c-w distance	replicate id	chrom	median midpoint	median midpoint+1	c-w sum	c-w distance	replicate id
+chr1	87168	87169	4488.704113924051	2	-1	9006.0	chr1	87162	87163	5531.0	11	1	chr1	87174	87175	3475.0	-13	2
+chr1	229759	229760	4512.3598101265825	2	16	9062.0	chr1	229759	229760	4531.0	16	1	chr1	229759	229760	4531.0	16	2
+chr1	123	124	4186.70506329114	2	52	8408.0	chr1	123	124	4204.0	52	1	chr1	123	124	4204.0	52	2
+chr1	262	263	3246.0278481012656	2	18	6512.0	chr1	268	269	4088.0	6	1	chr1	256	257	2424.0	30	2
+chr1	231	232	4699.198417721519	2	-13	9443.0	chr1	218	219	4022.0	14	1	chr1	245	246	5421.0	-40	2
+chr1	87188	87189	4647.554746835443	2	39	9342.0	chr1	87194	87195	3643.0	27	1	chr1	87182	87183	5699.0	51	2
+chr1	59	60	2869.1477848101267	2	2	5762.0	chr1	59	60	2881.0	2	1	chr1	59	60	2881.0	2	2
+chr1	257	258	2595.2319620253165	2	63	5213.0	chr1	265	266	2474.0	48	1	chr1	250	251	2739.0	78	2
+chr1	92651	92652	1420.1610759493672	2	20	2844.0	chr1	92645	92646	2397.0	8	1	chr1	92657	92658	447.0	33	2
+chr1	101399	101400	2351.2870253164556	2	-44	4722.0	chr1	101399	101400	2361.0	-44	1	chr1	101399	101400	2361.0	-44	2
+chr1	87109	87110	2249.7066455696204	2	3	4518.0	chr1	87109	87110	2259.0	3	1	chr1	87109	87110	2259.0	3	2
+chr1	156	157	2168.043987341772	2	59	4354.0	chr1	156	157	2177.0	59	1	chr1	156	157	2177.0	59	2
+chr1	70792	70793	2137.171518987342	2	12	4292.0	chr1	70792	70793	2146.0	12	1	chr1	70792	70793	2146.0	12	2
+chr1	6218	6219	2116.257911392405	2	91	4250.0	chr1	6218	6219	2125.0	91	1	chr1	6218	6219	2125.0	91	2
+chr1	86996	86997	2181.75	2	66	4383.0	chr1	86982	86983	2013.0	37	1	chr1	87011	87012	2370.0	95	2
+chr1	169095	169096	1879.2370253164559	2	-43	3774.0	chr1	169095	169096	1887.0	-43	1	chr1	169095	169096	1887.0	-43	2
+chr1	155079	155080	1566.5287974683545	2	83	3146.0	chr1	155079	155080	1573.0	83	1	chr1	155079	155080	1573.0	83	2
+chr1	203863	203864	1469.9278481012657	2	-37	2952.0	chr1	203863	203864	1476.0	-37	1	chr1	203863	203864	1476.0	-37	2
+chr1	92421	92422	1382.2898734177215	2	0	2776.0	chr1	92421	92422	1388.0	0	1	chr1	92421	92422	1388.0	0	2
+chr1	185109	185110	1365.3598101265823	2	46	2742.0	chr1	185109	185110	1371.0	46	1	chr1	185109	185110	1371.0	46	2
+chr1	39076	39077	1344.4462025316457	2	-29	2700.0	chr1	39076	39077	1350.0	-29	1	chr1	39076	39077	1350.0	-29	2
+chr1	6454	6455	1243.8617088607593	2	63	2498.0	chr1	6454	6455	1249.0	63	1	chr1	6454	6455	1249.0	63	2
+chr1	87029	87030	1009.0689873417721	2	1	2025.0	chr1	87044	87045	1191.0	30	1	chr1	87015	87016	834.0	-28	2
+chr1	25305	25306	1178.1332278481013	2	99	2366.0	chr1	25305	25306	1183.0	99	1	chr1	25305	25306	1183.0	99	2
+chr1	139230	139231	1174.1496835443038	2	15	2358.0	chr1	139230	139231	1179.0	15	1	chr1	139230	139231	1179.0	15	2
+chr1	335	336	1173.125	2	-5	2356.0	chr1	325	326	1171.0	16	1	chr1	345	346	1185.0	-25	2
+chr1	55548	55549	952.067088607595	2	86	1912.0	chr1	55548	55549	956.0	86	1	chr1	55548	55549	956.0	86	2
+chr1	360	361	888.3591772151899	2	45	1784.0	chr1	370	371	899.0	25	1	chr1	350	351	885.0	66	2
+chr1	72795	72796	961.6268987341772	2	9	1932.0	chr1	72805	72806	869.0	29	1	chr1	72786	72787	1063.0	-10	2
+chr1	22580	22581	859.4496835443038	2	-2	1726.0	chr1	22580	22581	863.0	-2	1	chr1	22580	22581	863.0	-2	2
+chr1	199413	199414	806.6677215189873	2	-30	1620.0	chr1	199413	199414	810.0	-30	1	chr1	199413	199414	810.0	-30	2
+chr1	92584	92585	1800.832911392405	2	62	3625.0	chr1	92567	92568	789.0	28	1	chr1	92601	92602	2836.0	96	2
+chr1	19213	19214	774.7993670886076	2	-25	1556.0	chr1	19213	19214	778.0	-25	1	chr1	19213	19214	778.0	-25	2
+chr1	116649	116650	769.8199367088607	2	-41	1546.0	chr1	116649	116650	773.0	-41	1	chr1	116649	116650	773.0	-41	2
+chr1	124306	124307	757.8693037974683	2	-43	1522.0	chr1	124306	124307	761.0	-43	1	chr1	124306	124307	761.0	-43	2
+chr1	113782	113783	713.0544303797469	2	-20	1432.0	chr1	113782	113783	716.0	-20	1	chr1	113782	113783	716.0	-20	2
+chr1	138876	138877	708.075	2	-4	1422.0	chr1	138876	138877	711.0	-4	1	chr1	138876	138877	711.0	-4	2
+chr1	229762	229763	696.1243670886076	2	63	1398.0	chr1	229762	229763	699.0	63	1	chr1	229762	229763	699.0	63	2
+chr1	95955	95956	686.1655063291139	2	51	1378.0	chr1	95955	95956	689.0	51	1	chr1	95955	95956	689.0	51	2
+chr1	134230	134231	656.2889240506329	2	100	1318.0	chr1	134230	134231	659.0	100	1	chr1	134230	134231	659.0	100	2
+chr1	170134	170135	654.2971518987342	2	10	1314.0	chr1	170134	170135	657.0	10	1	chr1	170134	170135	657.0	10	2
+chr1	228672	228673	623.4246835443038	2	58	1252.0	chr1	228672	228673	626.0	58	1	chr1	228672	228673	626.0	58	2
+chr1	65160	65161	615.4575949367088	2	-4	1236.0	chr1	65160	65161	618.0	-4	1	chr1	65160	65161	618.0	-4	2
+chr1	151365	151366	592.5522151898734	2	-28	1190.0	chr1	151365	151366	595.0	-28	1	chr1	151365	151366	595.0	-28	2
+chr1	108611	108612	570.6427215189874	2	-45	1146.0	chr1	108611	108612	573.0	-45	1	chr1	108611	108612	573.0	-45	2
+chr1	106047	106048	569.646835443038	2	7	1144.0	chr1	106047	106048	572.0	7	1	chr1	106047	106048	572.0	7	2
+chr1	481	482	682.2006329113924	2	-7	1371.0	chr1	500	501	569.0	-44	1	chr1	463	464	802.0	30	2
+chr1	59228	59229	562.6756329113924	2	56	1130.0	chr1	59228	59229	565.0	56	1	chr1	59228	59229	565.0	56	2
+chr1	173276	173277	543.7537974683544	2	8	1092.0	chr1	173276	173277	546.0	8	1	chr1	173276	173277	546.0	8	2
+chr1	434	435	431.5107594936709	2	43	866.0	chr1	452	453	504.0	8	1	chr1	417	418	362.0	78	2
+chr1	45670	45671	490.971835443038	2	-35	986.0	chr1	45670	45671	493.0	-35	1	chr1	45670	45671	493.0	-35	2
+chr1	31670	31671	487.9841772151899	2	66	980.0	chr1	31670	31671	490.0	66	1	chr1	31670	31671	490.0	66	2
+chr1	32483	32484	476.0335443037975	2	48	956.0	chr1	32483	32484	478.0	48	1	chr1	32483	32484	478.0	48	2
+chr1	6714	6715	431.218670886076	2	-4	866.0	chr1	6714	6715	433.0	-4	1	chr1	6714	6715	433.0	-4	2
+chr1	136369	136370	363.498417721519	2	-14	730.0	chr1	136369	136370	365.0	-14	1	chr1	136369	136370	365.0	-14	2
+chr1	39237	39238	360.5107594936709	2	61	724.0	chr1	39237	39238	362.0	61	1	chr1	39237	39238	362.0	61	2
+chr1	668	669	317.6876582278481	2	-48	638.0	chr1	668	669	319.0	-48	1	chr1	668	669	319.0	-48	2
+chr1	98551	98552	121.49810126582278	2	27	244.0	chr1	98551	98552	122.0	27	1	chr1	98551	98552	122.0	27	2
+chr1	180331	180332	96.60094936708862	2	82	194.0	chr1	180331	180332	97.0	82	1	chr1	180331	180332	97.0	82	2
+chr1	230172	230173	42.87658227848101	2	-2	86.0	chr1	230178	230179	56.0	10	1	chr1	230166	230167	30.0	-13	2
+chr1	230133	230134	26.95886075949367	2	-8	54.0	chr1	230125	230126	44.0	10	1	chr1	230142	230143	10.0	-25	2
+chr1	230154	230155	44.69145569620253	2	34	90.0	chr1	230157	230158	15.0	5	1	chr1	230151	230152	75.0	63	2
+chr1	96919	96920	11.950632911392404	2	3	24.0	chr1	96919	96920	12.0	3	1	chr1	96919	96920	12.0	3	2
+chr1	197535	197536	4.9794303797468356	2	73	10.0	chr1	197535	197536	5.0	73	1	chr1	197535	197536	5.0	73	2
Binary file test-data/histogram_out1.pdf has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/key_out1.tabular	Tue Nov 17 14:26:08 2015 -0500
@@ -0,0 +1,3 @@
+data	median read count
+1	783.5
+2	790.0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/largest_s_input1.gff	Tue Nov 17 14:26:08 2015 -0500
@@ -0,0 +1,64 @@
+chr1	cwpair	.	59	60	2881.0	.	.	cw_distance=2
+chr1	cwpair	.	123	124	4204.0	.	.	cw_distance=52
+chr1	cwpair	.	156	157	2177.0	.	.	cw_distance=59
+chr1	cwpair	.	245	246	5421.0	.	.	cw_distance=-40
+chr1	cwpair	.	250	251	2739.0	.	.	cw_distance=78
+chr1	cwpair	.	256	257	2424.0	.	.	cw_distance=30
+chr1	cwpair	.	345	346	1185.0	.	.	cw_distance=-25
+chr1	cwpair	.	350	351	885.0	.	.	cw_distance=66
+chr1	cwpair	.	417	418	362.0	.	.	cw_distance=78
+chr1	cwpair	.	463	464	802.0	.	.	cw_distance=30
+chr1	cwpair	.	668	669	319.0	.	.	cw_distance=-48
+chr1	cwpair	.	6218	6219	2125.0	.	.	cw_distance=91
+chr1	cwpair	.	6454	6455	1249.0	.	.	cw_distance=63
+chr1	cwpair	.	6714	6715	433.0	.	.	cw_distance=-4
+chr1	cwpair	.	19213	19214	778.0	.	.	cw_distance=-25
+chr1	cwpair	.	22580	22581	863.0	.	.	cw_distance=-2
+chr1	cwpair	.	25305	25306	1183.0	.	.	cw_distance=99
+chr1	cwpair	.	31670	31671	490.0	.	.	cw_distance=66
+chr1	cwpair	.	32483	32484	478.0	.	.	cw_distance=48
+chr1	cwpair	.	39076	39077	1350.0	.	.	cw_distance=-29
+chr1	cwpair	.	39237	39238	362.0	.	.	cw_distance=61
+chr1	cwpair	.	45670	45671	493.0	.	.	cw_distance=-35
+chr1	cwpair	.	55548	55549	956.0	.	.	cw_distance=86
+chr1	cwpair	.	59228	59229	565.0	.	.	cw_distance=56
+chr1	cwpair	.	65160	65161	618.0	.	.	cw_distance=-4
+chr1	cwpair	.	70792	70793	2146.0	.	.	cw_distance=12
+chr1	cwpair	.	72786	72787	1063.0	.	.	cw_distance=-10
+chr1	cwpair	.	87011	87012	2370.0	.	.	cw_distance=95
+chr1	cwpair	.	87015	87016	834.0	.	.	cw_distance=-28
+chr1	cwpair	.	87109	87110	2259.0	.	.	cw_distance=3
+chr1	cwpair	.	87174	87175	3475.0	.	.	cw_distance=-13
+chr1	cwpair	.	87182	87183	5699.0	.	.	cw_distance=51
+chr1	cwpair	.	92421	92422	1388.0	.	.	cw_distance=0
+chr1	cwpair	.	92601	92602	2836.0	.	.	cw_distance=96
+chr1	cwpair	.	92657	92658	447.0	.	.	cw_distance=33
+chr1	cwpair	.	95955	95956	689.0	.	.	cw_distance=51
+chr1	cwpair	.	96919	96920	12.0	.	.	cw_distance=3
+chr1	cwpair	.	98551	98552	122.0	.	.	cw_distance=27
+chr1	cwpair	.	101399	101400	2361.0	.	.	cw_distance=-44
+chr1	cwpair	.	106047	106048	572.0	.	.	cw_distance=7
+chr1	cwpair	.	108611	108612	573.0	.	.	cw_distance=-45
+chr1	cwpair	.	113782	113783	716.0	.	.	cw_distance=-20
+chr1	cwpair	.	116649	116650	773.0	.	.	cw_distance=-41
+chr1	cwpair	.	124306	124307	761.0	.	.	cw_distance=-43
+chr1	cwpair	.	134230	134231	659.0	.	.	cw_distance=100
+chr1	cwpair	.	136369	136370	365.0	.	.	cw_distance=-14
+chr1	cwpair	.	138876	138877	711.0	.	.	cw_distance=-4
+chr1	cwpair	.	139230	139231	1179.0	.	.	cw_distance=15
+chr1	cwpair	.	151365	151366	595.0	.	.	cw_distance=-28
+chr1	cwpair	.	155079	155080	1573.0	.	.	cw_distance=83
+chr1	cwpair	.	169095	169096	1887.0	.	.	cw_distance=-43
+chr1	cwpair	.	170134	170135	657.0	.	.	cw_distance=10
+chr1	cwpair	.	173276	173277	546.0	.	.	cw_distance=8
+chr1	cwpair	.	180331	180332	97.0	.	.	cw_distance=82
+chr1	cwpair	.	185109	185110	1371.0	.	.	cw_distance=46
+chr1	cwpair	.	197535	197536	5.0	.	.	cw_distance=73
+chr1	cwpair	.	199413	199414	810.0	.	.	cw_distance=-30
+chr1	cwpair	.	203863	203864	1476.0	.	.	cw_distance=-37
+chr1	cwpair	.	228672	228673	626.0	.	.	cw_distance=58
+chr1	cwpair	.	229759	229760	4531.0	.	.	cw_distance=16
+chr1	cwpair	.	229762	229763	699.0	.	.	cw_distance=63
+chr1	cwpair	.	230142	230143	10.0	.	.	cw_distance=-25
+chr1	cwpair	.	230151	230152	75.0	.	.	cw_distance=63
+chr1	cwpair	.	230166	230167	30.0	.	.	cw_distance=-13
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/orphan_out1.tabular	Tue Nov 17 14:26:08 2015 -0500
@@ -0,0 +1,3 @@
+chrom	midpoint	midpoint+1	c-w sum	c-w distance	replicate id
+chr1	388	389	359.0	20	1
+chr1	72731	72732	710.0	100	1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/summary_out1.gff	Tue Nov 17 14:26:08 2015 -0500
@@ -0,0 +1,64 @@
+chr1	repmatch	.	87168	87169	4488.704113924051	.	.	median_distance=-1;value_sum=9006.0;replicates=2
+chr1	repmatch	.	229759	229760	4512.3598101265825	.	.	median_distance=16;value_sum=9062.0;replicates=2
+chr1	repmatch	.	123	124	4186.70506329114	.	.	median_distance=52;value_sum=8408.0;replicates=2
+chr1	repmatch	.	262	263	3246.0278481012656	.	.	median_distance=18;value_sum=6512.0;replicates=2
+chr1	repmatch	.	231	232	4699.198417721519	.	.	median_distance=-13;value_sum=9443.0;replicates=2
+chr1	repmatch	.	87188	87189	4647.554746835443	.	.	median_distance=39;value_sum=9342.0;replicates=2
+chr1	repmatch	.	59	60	2869.1477848101267	.	.	median_distance=2;value_sum=5762.0;replicates=2
+chr1	repmatch	.	257	258	2595.2319620253165	.	.	median_distance=63;value_sum=5213.0;replicates=2
+chr1	repmatch	.	92651	92652	1420.1610759493672	.	.	median_distance=20;value_sum=2844.0;replicates=2
+chr1	repmatch	.	101399	101400	2351.2870253164556	.	.	median_distance=-44;value_sum=4722.0;replicates=2
+chr1	repmatch	.	87109	87110	2249.7066455696204	.	.	median_distance=3;value_sum=4518.0;replicates=2
+chr1	repmatch	.	156	157	2168.043987341772	.	.	median_distance=59;value_sum=4354.0;replicates=2
+chr1	repmatch	.	70792	70793	2137.171518987342	.	.	median_distance=12;value_sum=4292.0;replicates=2
+chr1	repmatch	.	6218	6219	2116.257911392405	.	.	median_distance=91;value_sum=4250.0;replicates=2
+chr1	repmatch	.	86996	86997	2181.75	.	.	median_distance=66;value_sum=4383.0;replicates=2
+chr1	repmatch	.	169095	169096	1879.2370253164559	.	.	median_distance=-43;value_sum=3774.0;replicates=2
+chr1	repmatch	.	155079	155080	1566.5287974683545	.	.	median_distance=83;value_sum=3146.0;replicates=2
+chr1	repmatch	.	203863	203864	1469.9278481012657	.	.	median_distance=-37;value_sum=2952.0;replicates=2
+chr1	repmatch	.	92421	92422	1382.2898734177215	.	.	median_distance=0;value_sum=2776.0;replicates=2
+chr1	repmatch	.	185109	185110	1365.3598101265823	.	.	median_distance=46;value_sum=2742.0;replicates=2
+chr1	repmatch	.	39076	39077	1344.4462025316457	.	.	median_distance=-29;value_sum=2700.0;replicates=2
+chr1	repmatch	.	6454	6455	1243.8617088607593	.	.	median_distance=63;value_sum=2498.0;replicates=2
+chr1	repmatch	.	87029	87030	1009.0689873417721	.	.	median_distance=1;value_sum=2025.0;replicates=2
+chr1	repmatch	.	25305	25306	1178.1332278481013	.	.	median_distance=99;value_sum=2366.0;replicates=2
+chr1	repmatch	.	139230	139231	1174.1496835443038	.	.	median_distance=15;value_sum=2358.0;replicates=2
+chr1	repmatch	.	335	336	1173.125	.	.	median_distance=-5;value_sum=2356.0;replicates=2
+chr1	repmatch	.	55548	55549	952.067088607595	.	.	median_distance=86;value_sum=1912.0;replicates=2
+chr1	repmatch	.	360	361	888.3591772151899	.	.	median_distance=45;value_sum=1784.0;replicates=2
+chr1	repmatch	.	72795	72796	961.6268987341772	.	.	median_distance=9;value_sum=1932.0;replicates=2
+chr1	repmatch	.	22580	22581	859.4496835443038	.	.	median_distance=-2;value_sum=1726.0;replicates=2
+chr1	repmatch	.	199413	199414	806.6677215189873	.	.	median_distance=-30;value_sum=1620.0;replicates=2
+chr1	repmatch	.	92584	92585	1800.832911392405	.	.	median_distance=62;value_sum=3625.0;replicates=2
+chr1	repmatch	.	19213	19214	774.7993670886076	.	.	median_distance=-25;value_sum=1556.0;replicates=2
+chr1	repmatch	.	116649	116650	769.8199367088607	.	.	median_distance=-41;value_sum=1546.0;replicates=2
+chr1	repmatch	.	124306	124307	757.8693037974683	.	.	median_distance=-43;value_sum=1522.0;replicates=2
+chr1	repmatch	.	113782	113783	713.0544303797469	.	.	median_distance=-20;value_sum=1432.0;replicates=2
+chr1	repmatch	.	138876	138877	708.075	.	.	median_distance=-4;value_sum=1422.0;replicates=2
+chr1	repmatch	.	229762	229763	696.1243670886076	.	.	median_distance=63;value_sum=1398.0;replicates=2
+chr1	repmatch	.	95955	95956	686.1655063291139	.	.	median_distance=51;value_sum=1378.0;replicates=2
+chr1	repmatch	.	134230	134231	656.2889240506329	.	.	median_distance=100;value_sum=1318.0;replicates=2
+chr1	repmatch	.	170134	170135	654.2971518987342	.	.	median_distance=10;value_sum=1314.0;replicates=2
+chr1	repmatch	.	228672	228673	623.4246835443038	.	.	median_distance=58;value_sum=1252.0;replicates=2
+chr1	repmatch	.	65160	65161	615.4575949367088	.	.	median_distance=-4;value_sum=1236.0;replicates=2
+chr1	repmatch	.	151365	151366	592.5522151898734	.	.	median_distance=-28;value_sum=1190.0;replicates=2
+chr1	repmatch	.	108611	108612	570.6427215189874	.	.	median_distance=-45;value_sum=1146.0;replicates=2
+chr1	repmatch	.	106047	106048	569.646835443038	.	.	median_distance=7;value_sum=1144.0;replicates=2
+chr1	repmatch	.	481	482	682.2006329113924	.	.	median_distance=-7;value_sum=1371.0;replicates=2
+chr1	repmatch	.	59228	59229	562.6756329113924	.	.	median_distance=56;value_sum=1130.0;replicates=2
+chr1	repmatch	.	173276	173277	543.7537974683544	.	.	median_distance=8;value_sum=1092.0;replicates=2
+chr1	repmatch	.	434	435	431.5107594936709	.	.	median_distance=43;value_sum=866.0;replicates=2
+chr1	repmatch	.	45670	45671	490.971835443038	.	.	median_distance=-35;value_sum=986.0;replicates=2
+chr1	repmatch	.	31670	31671	487.9841772151899	.	.	median_distance=66;value_sum=980.0;replicates=2
+chr1	repmatch	.	32483	32484	476.0335443037975	.	.	median_distance=48;value_sum=956.0;replicates=2
+chr1	repmatch	.	6714	6715	431.218670886076	.	.	median_distance=-4;value_sum=866.0;replicates=2
+chr1	repmatch	.	136369	136370	363.498417721519	.	.	median_distance=-14;value_sum=730.0;replicates=2
+chr1	repmatch	.	39237	39238	360.5107594936709	.	.	median_distance=61;value_sum=724.0;replicates=2
+chr1	repmatch	.	668	669	317.6876582278481	.	.	median_distance=-48;value_sum=638.0;replicates=2
+chr1	repmatch	.	98551	98552	121.49810126582278	.	.	median_distance=27;value_sum=244.0;replicates=2
+chr1	repmatch	.	180331	180332	96.60094936708862	.	.	median_distance=82;value_sum=194.0;replicates=2
+chr1	repmatch	.	230172	230173	42.87658227848101	.	.	median_distance=-2;value_sum=86.0;replicates=2
+chr1	repmatch	.	230133	230134	26.95886075949367	.	.	median_distance=-8;value_sum=54.0;replicates=2
+chr1	repmatch	.	230154	230155	44.69145569620253	.	.	median_distance=34;value_sum=90.0;replicates=2
+chr1	repmatch	.	96919	96920	11.950632911392404	.	.	median_distance=3;value_sum=24.0;replicates=2
+chr1	repmatch	.	197535	197536	4.9794303797468356	.	.	median_distance=73;value_sum=10.0;replicates=2
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Tue Nov 17 14:26:08 2015 -0500
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="anaconda" version="2.3.0">
+        <repository changeset_revision="1da77309352e" name="package_anaconda_2_3_0" owner="greg" prior_installation_required="True" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>