Mercurial > repos > bgruening > repmatch_gff3

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/repmatch_gff3.py	Wed Dec 23 09:24:35 2015 -0500
@@ -0,0 +1,49 @@
+# repmatch.py
+#
+# Replicate matching - matches paired peaks from two or more replicates
+#
+# Input: one or more gff files (matched_peak output from cwpair2, each a list of paired peaks from a replicate
+#
+# Output: list of matched groups and list of unmatched peaks
+# Files: statistics_table.tabular (file to replicate ID), matched_paired_peaks.tabular, detail.tabular, unmatched_peaks.tabular
+
+import argparse
+import repmatch_gff3_util
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--input', dest='inputs', action='append', nargs=2, help="Input datasets")
+    parser.add_argument('--method', dest='method', default='closest', help='Method of finding match')
+    parser.add_argument('--distance', dest='distance', type=int, default=50, help='Maximum distance between peaks in different replicates to allow merging')
+    parser.add_argument('--step', dest='step', type=int, default=0, help='Step size of distance for each iteration')
+    parser.add_argument('--replicates', dest='replicates', type=int, default=2, help='Minimum number of replicates that must be matched for merging to occur')
+    parser.add_argument('--low_limit', dest='low_limit', type=int, default=-1000, help='Lower limit for c-w distance filter')
+    parser.add_argument('--up_limit', dest='up_limit', type=int, default=1000, help='Upper limit for c-w distance filter')
+    parser.add_argument('--output_files', dest='output_files', default='all', help='Restrict output dataset collections.')
+    parser.add_argument('--output_matched_peaks', dest='output_matched_peaks', help='Matched groups in gff format')
+    parser.add_argument('--output_unmatched_peaks', dest='output_unmatched_peaks', default=None, help='Unmatched paired peaks in tabular format')
+    parser.add_argument('--output_detail', dest='output_detail', default=None, help='Details in tabular format')
+    parser.add_argument('--output_statistics_table', dest='output_statistics_table', default=None, help='Keys in tabular format')
+    parser.add_argument('--output_statistics_histogram', dest='output_statistics_histogram', default=None, help='Histogram')
+
+    args = parser.parse_args()
+
+    dataset_paths = []
+    hids = []
+    for (dataset_path, hid) in args.inputs:
+        dataset_paths.append(dataset_path)
+        hids.append(hid)
+    repmatch_gff3_util.process_files(dataset_paths,
+                                     hids,
+                                     args.method,
+                                     args.distance,
+                                     args.step,
+                                     args.replicates,
+                                     args.up_limit,
+                                     args.low_limit,
+                                     args.output_files,
+                                     args.output_matched_peaks,
+                                     args.output_unmatched_peaks,
+                                     args.output_detail,
+                                     args.output_statistics_table,
+                                     args.output_statistics_histogram)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/repmatch_gff3.xml	Wed Dec 23 09:24:35 2015 -0500
@@ -0,0 +1,161 @@
+<?xml version="1.0"?>
+<tool id="repmatch_gff3" name="RepMatch" version="@WRAPPER_VERSION@.0">
+    <description>Match paired peaks from two or more replicates</description>
+    <macros>
+        <import>repmatch_gff3_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command>
+        python $__tool_directory__/repmatch_gff3.py
+        #for $i in $input:
+             --input "${i}" "${i.hid}"
+        #end for
+        --method $method
+        --distance $distance
+        --replicates $replicates
+        --output_files $output_files_cond.output_files
+        --output_matched_peaks "$output_matched_peaks"
+        #if str($output_files_cond.output_files) in ["all", "matched_peaks_unmatched_peaks"]:
+            --output_unmatched_peaks "$output_unmatched_peaks"
+        #end if
+        #if str($output_files_cond.output_files) =="all":
+            --output_detail "$output_detail"
+            --output_statistics_table "$output_statistics_table"
+            --output_statistics_histogram "$output_statistics_histogram"
+        #end if
+        #if str($advanced_options_cond.advanced_options) == "on":
+            --step $advanced_options_cond.step
+            --low_limit $advanced_options_cond.low_limit
+            --up_limit $advanced_options_cond.up_limit
+        #end if
+    </command>
+    <inputs>
+        <param  name="input" type="data" format="gff" multiple="True" min="2" label="Match paired peaks on" />
+        <param name="method" type="select" label="Method of finding match">
+            <option value="closest" selected="True">Closest</option>
+            <option value="largest">Largest</option>
+            <option value="all">All</option>
+        </param>
+        <param name="distance" type="integer" value="50" min="0" label="Maximum distance between peaks in different replicates to allow merging" />
+        <param name="replicates" type="integer" value="2" min="2" label="Minimum number of replicates that must be matched for merging to occur" />
+        <conditional name="output_files_cond">
+            <param name="output_files" type="select" label="Select output" help="Statistics will always be generated." >
+                <option value="all" selected="True">everything</option>
+                <option value="matched_peaks">matched paired peaks only</option>
+                <option value="matched_peaks_unmatched_peaks">matched paired peaks and unmatched paired peaks only</option>
+            </param>
+            <when value="matched_peaks" />
+            <when value="matched_peaks_unmatched_peaks" />
+            <when value="all" />
+        </conditional>
+        <conditional name="advanced_options_cond">
+            <param name="advanced_options" type="select" label="Advanced options">
+                <option value="off" selected="true">Hide advanced options</option>
+                <option value="on">Display advanced options</option>
+            </param>
+            <when value="on">
+                <param name="step" type="integer" value="0" min="0" label="Step size" help="Distance for each iteration" />
+                <param name="low_limit" type="integer" value="-1000" label="Lower limit for Crick-Watson distance filter" />
+                <param name="up_limit" type="integer" value="1000" label="Upper limit for Crick-Watson distance filter" />
+            </when>
+            <when value="off" />
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="output_statistics_table" format="tabular" label="Statistics Table: ${tool.name} on ${on_string}">
+            <filter>output_files_cond["output_files"] == "all"</filter>
+       </data>
+        <data name="output_statistics_histogram" format="pdf" label="Statistics Histogram: ${tool.name} on ${on_string}">
+            <filter>output_files_cond["output_files"] == "all"</filter>
+       </data>
+        <data name="output_detail" format="tabular" label="Data D: ${tool.name} on ${on_string}">
+            <filter>output_files_cond["output_files"] == "all"</filter>
+       </data>
+        <data name="output_unmatched_peaks" format="tabular" label="Data UP: ${tool.name} on ${on_string}">
+            <filter>output_files_cond["output_files"] in ["all", "matched_peaks_unmatched_peaks"]</filter>
+        </data>
+        <data name="output_matched_peaks" format="gff" label="Data MP: ${tool.name} on ${on_string}" />
+    </outputs>
+    <tests>
+        <param name="input" value="closest_matched_pairs_input1.gff" ftype="gff" />
+        <param name="input" value="largest_matched_pairs_input1.gff" ftype="gff" />
+        <param name="method" value="closest" />
+        <param name="distance" value="50" />
+        <param name="replicates" value="2" />
+        <param name="output_files" value="all" />
+        <param name="step" value="0" />
+        <param name="low_limit" value="-1000" />
+        <param name="up_limit" value="1000" />
+        <output name="output_statistics_table" file="statistics_table_out1.tabular" ftype="tabular" />
+        <output name="output_statistics_histogram" file="statistics_histogram_out1.pdf" ftype="pdf" compare="sim_size" />
+        <output name="output_detail" file="detail_out1.tabular" ftype="tabular" />
+        <output name="output_unmatched_peaks" file="unmatched_peaks_out1.tabular" ftype="tabular" />
+        <output name="output_matched_peaks" file="matched_peaks_out1.gff" ftype="gff" />
+    </tests>
+    <help>
+**What it does**
+
+RepMatch accepts two or more input datasets, and starts by defining peak-pair midpoints in the first dataset.  It then
+discovers all peak-pair midpoints in the second dataset that are within the distance, defined by the tool's **Maximum
+distance between peaks in different replicates to allow merging** parameter, from the peak-pair midpoint coordinate in
+the first dataset.  When encountering multiple candidates to match (one-to-many), RepMatch uses the method defined by
+the tool's **Method of finding match** parameter so that there is at most only a one-to-one match across the two datasets.
+This method provides the following options:
+
+ * **closest** - matches only the closest one in bp distance.
+ * **largest** -  matches the one that contain the most number of reads.
+ * **all** -  both methods are run separately.
+
+RepMatch matching is an iterative process, as it attempts to find the centroid coordinate amongst all replicates. As such,
+the centroid is the point of reference for "distqnce" and "closest".  This process can be sped up by increasing the tool's
+**Step size** parameter.
+
+The minimum number of replicates that can be matched for a match to occur is defined by the tool's **Minimum number of
+replicates that must be matched for merging to occur** parameter.  Additional filters can be applied using the tool's
+**Advanced options**, including a lower and upper limit for the C-W distance.
+
+.. image:: $PATH_TO_IMAGES/repmatch.png
+
+-----
+
+**Options**
+
+ * **Distance** - Maximum distance for discovering all peak-pair midpoints in a second dataset relative to the peak-pair midpoints in the first dataset
+ * **Method** - Method to use when encountering multiple candidates to match so that there is at most only a one-to-one match across the two datasets.
+ * **Step Size** - Distance for each iteration.
+ * **Replicates** - Minimum number of replicates that can be matched for a match to occur.  This value must be at least 2.
+ * **Lower Limit** - Lower limit for the Crick-Watson distance filter.
+ * **Upper Limit** - Upper limit for the Crick-Watson distance filter.
+
+-----
+
+**Output Data Files**
+
+ * **Data MP** - gff file consisting of only peak pairs
+
+  - Columns are **chr**, **script**, **blank**, **peak start**, **peak end**, **blank**, **normalized tag counts**, **blank** and **info**.
+  - Peak start and end are separated by one coordinate.
+  - Normalized tag is the occupancy averaged across replicates.
+  - Attributes include C-W distance, sum total of tag counts, number of replicates merged.
+
+ * **Data D** - tabular file consisting of the list of all matched replicates.
+ * **Data UP** - tabular file consisting of all unmatched peak-pairs.
+
+**Output Statistics Files**
+
+ * **Statistics Table** - tabular file providing the description key of **Data D**.
+ * **Statistics Histogram** - graph of the number of matched locations having the indicated replicate counts.
+
+**Comments on Replicates**
+
+Three types of replicates may be considered.  Biological replicates represent independently collected biological samples.
+At least two biological replicate must be performed for each experiment from which a conclusion is being drawn, and the
+conclusion must be evident in both biological replicates when analyzed separately.  Technical replicates represent a re-run
+of the assay on the same biological material.  This is usually done when one replicate fails to produce quality data, and is
+used to replace that earlier replicate.  Sequencing replicates represent additional sequencing of the same successful library
+in order to obtain more reads should the analysis require it.  The reads from individual sequencing replicates are usually
+merged without need for separate analysis.
+
+    </help>
+    <expand macro="citations" />
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/repmatch_gff3_macros.xml	Wed Dec 23 09:24:35 2015 -0500
@@ -0,0 +1,29 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<macros>
+    <token name="@WRAPPER_VERSION@">1.0</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="2.3.0">anaconda</requirement>
+        </requirements>
+    </xml>
+    <xml name="stdio">
+        <stdio>
+            <exit_code range="1:"/>
+            <exit_code range=":-1"/>
+            <regex match="Error:"/>
+            <regex match="Exception:"/>
+        </stdio>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="bibtex">
+                @unpublished{None,
+                author = {None},
+                title = {None},
+                year = {None},
+                eprint = {None},
+                url = {http://www.huck.psu.edu/content/research/independent-centers-excellence/center-for-eukaryotic-gene-regulation}
+            }</citation>
+        </citations>
+    </xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/repmatch_gff3_util.py	Wed Dec 23 09:24:35 2015 -0500
@@ -0,0 +1,462 @@
+import bisect
+import csv
+import os
+import shutil
+import sys
+import tempfile
+import matplotlib
+matplotlib.use('Agg')
+from matplotlib import pyplot
+
+# Graph settings
+Y_LABEL = 'Counts'
+X_LABEL = 'Number of matched replicates'
+TICK_WIDTH = 3
+# Amount to shift the graph to make labels fit, [left, right, top, bottom]
+ADJUST = [0.180, 0.9, 0.9, 0.1]
+# Length of tick marks, use TICK_WIDTH for width
+pyplot.rc('xtick.major', size=10.00)
+pyplot.rc('ytick.major', size=10.00)
+pyplot.rc('lines', linewidth=4.00)
+pyplot.rc('axes', linewidth=3.00)
+pyplot.rc('font', family='Bitstream Vera Sans', size=32.0)
+
+COLORS = 'krb'
+
+
+class Replicate(object):
+
+    def __init__(self, id, dataset_path):
+        self.id = id
+        self.dataset_path = dataset_path
+        self.parse(csv.reader(open(dataset_path, 'rt'), delimiter='\t'))
+
+    def parse(self, reader):
+        self.chromosomes = {}
+        for line in reader:
+            if line[0].startswith("#") or line[0].startswith('"'):
+                continue
+            cname, junk, junk, mid, midplus, value, strand, junk, attrs = line
+            attrs = parse_gff_attrs(attrs)
+            distance = attrs['cw_distance']
+            mid = int(mid)
+            midplus = int(midplus)
+            value = float(value)
+            distance = int(distance)
+            if cname not in self.chromosomes:
+                self.chromosomes[cname] = Chromosome(cname)
+            chrom = self.chromosomes[cname]
+            chrom.add_peak(Peak(cname, mid, value, distance, self))
+        for chrom in self.chromosomes.values():
+            chrom.sort_by_index()
+
+    def filter(self, up_limit, low_limit):
+        for chrom in self.chromosomes.values():
+            chrom.filter(up_limit, low_limit)
+
+    def size(self):
+        return sum([len(c.peaks) for c in self.chromosomes.values()])
+
+
+class Chromosome(object):
+
+    def __init__(self, name):
+        self.name = name
+        self.peaks = []
+
+    def add_peak(self, peak):
+        self.peaks.append(peak)
+
+    def sort_by_index(self):
+        self.peaks.sort(key=lambda peak: peak.midpoint)
+        self.keys = make_keys(self.peaks)
+
+    def remove_peak(self, peak):
+        i = bisect.bisect_left(self.keys, peak.midpoint)
+        # If the peak was actually found
+        if i < len(self.peaks) and self.peaks[i].midpoint == peak.midpoint:
+            del self.keys[i]
+            del self.peaks[i]
+
+    def filter(self, up_limit, low_limit):
+        self.peaks = [p for p in self.peaks if low_limit <= p.distance <= up_limit]
+        self.keys = make_keys(self.peaks)
+
+
+class Peak(object):
+
+    def __init__(self, chrom, midpoint, value, distance, replicate):
+        self.chrom = chrom
+        self.value = value
+        self.midpoint = midpoint
+        self.distance = distance
+        self.replicate = replicate
+
+    def normalized_value(self, med):
+        return self.value * med / self.replicate.median
+
+
+class PeakGroup(object):
+
+    def __init__(self):
+        self.peaks = {}
+
+    def add_peak(self, repid, peak):
+        self.peaks[repid] = peak
+
+    @property
+    def chrom(self):
+        return self.peaks.values()[0].chrom
+
+    @property
+    def midpoint(self):
+        return median([peak.midpoint for peak in self.peaks.values()])
+
+    @property
+    def num_replicates(self):
+        return len(self.peaks)
+
+    @property
+    def median_distance(self):
+        return median([peak.distance for peak in self.peaks.values()])
+
+    @property
+    def value_sum(self):
+        return sum([peak.value for peak in self.peaks.values()])
+
+    def normalized_value(self, med):
+        values = []
+        for peak in self.peaks.values():
+            values.append(peak.normalized_value(med))
+        return median(values)
+
+    @property
+    def peakpeak_distance(self):
+        keys = self.peaks.keys()
+        return abs(self.peaks[keys[0]].midpoint - self.peaks[keys[1]].midpoint)
+
+
+class FrequencyDistribution(object):
+
+    def __init__(self, d=None):
+        self.dist = d or {}
+
+    def add(self, x):
+        self.dist[x] = self.dist.get(x, 0) + 1
+
+    def graph_series(self):
+        x = []
+        y = []
+        for key, val in self.dist.items():
+            x.append(key)
+            y.append(val)
+        return x, y
+
+    def mode(self):
+        return max(self.dist.items(), key=lambda data: data[1])[0]
+
+    def size(self):
+        return sum(self.dist.values())
+
+
+def stop_err(msg):
+    sys.stderr.write(msg)
+    sys.exit(1)
+
+
+def median(data):
+    """
+    Find the integer median of the data set.
+    """
+    if not data:
+        return 0
+    sdata = sorted(data)
+    if len(data) % 2 == 0:
+        return (sdata[len(data)//2] + sdata[len(data)//2-1]) / 2
+    else:
+        return sdata[len(data)//2]
+
+
+def make_keys(peaks):
+    return [data.midpoint for data in peaks]
+
+
+def get_window(chromosome, target_peaks, distance):
+    """
+    Returns a window of all peaks from a replicate within a certain distance of
+    a peak from another replicate.
+    """
+    lower = target_peaks[0].midpoint
+    upper = target_peaks[0].midpoint
+    for peak in target_peaks:
+        lower = min(lower, peak.midpoint - distance)
+        upper = max(upper, peak.midpoint + distance)
+    start_index = bisect.bisect_left(chromosome.keys, lower)
+    end_index = bisect.bisect_right(chromosome.keys, upper)
+    return (chromosome.peaks[start_index: end_index], chromosome.name)
+
+
+def match_largest(window, peak, chrum):
+    if not window:
+        return None
+    if peak.chrom != chrum:
+        return None
+    return max(window, key=lambda cpeak: cpeak.value)
+
+
+def match_closest(window, peak, chrum):
+    if not window:
+        return None
+    if peak.chrom != chrum:
+        return None
+    return min(window, key=lambda match: abs(match.midpoint - peak.midpoint))
+
+
+def frequency_histogram(freqs, dataset_path, labels=[], title=''):
+    pyplot.clf()
+    pyplot.figure(figsize=(10, 10))
+    for i, freq in enumerate(freqs):
+        xvals, yvals = freq.graph_series()
+        # Go from high to low
+        xvals.reverse()
+        pyplot.bar([x-0.4 + 0.8/len(freqs)*i for x in xvals], yvals, width=0.8/len(freqs), color=COLORS[i])
+    pyplot.xticks(range(min(xvals), max(xvals)+1), map(str, reversed(range(min(xvals), max(xvals)+1))))
+    pyplot.xlabel(X_LABEL)
+    pyplot.ylabel(Y_LABEL)
+    pyplot.subplots_adjust(left=ADJUST[0], right=ADJUST[1], top=ADJUST[2], bottom=ADJUST[3])
+    ax = pyplot.gca()
+    for l in ax.get_xticklines() + ax.get_yticklines():
+        l.set_markeredgewidth(TICK_WIDTH)
+    pyplot.savefig(dataset_path)
+
+
+METHODS = {'closest': match_closest, 'largest': match_largest}
+
+
+def gff_attrs(d):
+    if not d:
+        return '.'
+    return ';'.join('%s=%s' % item for item in d.items())
+
+
+def parse_gff_attrs(s):
+    d = {}
+    if s == '.':
+        return d
+    for item in s.split(';'):
+        key, val = item.split('=')
+        d[key] = val
+    return d
+
+
+def gff_row(cname, start, end, score, source, type='.', strand='.', phase='.', attrs={}):
+    return (cname, source, type, start, end, score, strand, phase, gff_attrs(attrs))
+
+
+def get_temporary_plot_path():
+    """
+    Return the path to a temporary file with a valid image format
+    file extension that can be used with bioformats.
+    """
+    tmp_dir = tempfile.mkdtemp(prefix='tmp-repmatch-')
+    fd, name = tempfile.mkstemp(suffix='.pdf', dir=tmp_dir)
+    os.close(fd)
+    return name
+
+
+def process_files(dataset_paths, galaxy_hids, method, distance, step, replicates, up_limit, low_limit, output_files,
+                  output_matched_peaks, output_unmatched_peaks, output_detail, output_statistics_table, output_statistics_histogram):
+    output_statistics_histogram_file = output_files in ["all"] and method in ["all"]
+    if len(dataset_paths) < 2:
+        return
+    if method == 'all':
+        match_methods = METHODS.keys()
+    else:
+        match_methods = [method]
+    for match_method in match_methods:
+        statistics = perform_process(dataset_paths,
+                                     galaxy_hids,
+                                     match_method,
+                                     distance,
+                                     step,
+                                     replicates,
+                                     up_limit,
+                                     low_limit,
+                                     output_files,
+                                     output_matched_peaks,
+                                     output_unmatched_peaks,
+                                     output_detail,
+                                     output_statistics_table,
+                                     output_statistics_histogram)
+    if output_statistics_histogram_file:
+        tmp_statistics_histogram_path = get_temporary_plot_path()
+        frequency_histogram([stat['distribution'] for stat in [statistics]],
+                            tmp_statistics_histogram_path,
+                            METHODS.keys())
+        shutil.move(tmp_statistics_histogram_path, output_statistics_histogram)
+
+
+def perform_process(dataset_paths, galaxy_hids, method, distance, step, num_required, up_limit, low_limit, output_files,
+                    output_matched_peaks, output_unmatched_peaks, output_detail, output_statistics_table, output_statistics_histogram):
+    output_detail_file = output_files in ["all"] and output_detail is not None
+    output_statistics_table_file = output_files in ["all"] and output_statistics_table is not None
+    output_unmatched_peaks_file = output_files in ["all", "matched_peaks_unmatched_peaks"] and output_unmatched_peaks is not None
+    output_statistics_histogram_file = output_files in ["all"] and output_statistics_histogram is not None
+    replicates = []
+    for i, dataset_path in enumerate(dataset_paths):
+        try:
+            galaxy_hid = galaxy_hids[i]
+            r = Replicate(galaxy_hid, dataset_path)
+            replicates.append(r)
+        except Exception, e:
+            stop_err('Unable to parse file "%s", exception: %s' % (dataset_path, str(e)))
+    attrs = 'd%sr%s' % (distance, num_required)
+    if up_limit != 1000:
+        attrs += 'u%d' % up_limit
+    if low_limit != -1000:
+        attrs += 'l%d' % low_limit
+    if step != 0:
+        attrs += 's%d' % step
+
+    def td_writer(file_path):
+        # Returns a tab-delimited writer for a certain output
+        return csv.writer(open(file_path, 'wt'), delimiter='\t')
+
+    labels = ('chrom',
+              'median midpoint',
+              'median midpoint+1',
+              'median normalized reads',
+              'replicates',
+              'median c-w distance',
+              'reads sum')
+    for replicate in replicates:
+        labels += ('chrom',
+                   'median midpoint',
+                   'median midpoint+1',
+                   'c-w sum',
+                   'c-w distance',
+                   'replicate id')
+    matched_peaks_output = td_writer(output_matched_peaks)
+    if output_statistics_table_file:
+        statistics_table_output = td_writer(output_statistics_table)
+        statistics_table_output.writerow(('data', 'median read count'))
+    if output_detail_file:
+        detail_output = td_writer(output_detail)
+        detail_output.writerow(labels)
+    if output_unmatched_peaks_file:
+        unmatched_peaks_output = td_writer(output_unmatched_peaks)
+        unmatched_peaks_output.writerow(('chrom', 'midpoint', 'midpoint+1', 'c-w sum', 'c-w distance', 'replicate id'))
+    # Perform filtering
+    if up_limit < 1000 or low_limit > -1000:
+        for replicate in replicates:
+            replicate.filter(up_limit, low_limit)
+    # Actually merge the peaks
+    peak_groups = []
+    unmatched_peaks = []
+    freq = FrequencyDistribution()
+
+    def do_match(reps, distance):
+        # Copy list because we will mutate it, but keep replicate references.
+        reps = reps[:]
+        while len(reps) > 1:
+            # Iterate over each replicate as "main"
+            main = reps[0]
+            reps.remove(main)
+            for chromosome in main.chromosomes.values():
+                peaks_by_value = chromosome.peaks[:]
+                # Sort main replicate by value
+                peaks_by_value.sort(key=lambda peak: -peak.value)
+
+                def search_for_matches(group):
+                    # Here we use multiple passes, expanding the window to be
+                    #  +- distance from any previously matched peak.
+                    while True:
+                        new_match = False
+                        for replicate in reps:
+                            if replicate.id in group.peaks:
+                                # Stop if match already found for this replicate
+                                continue
+                            try:
+                                # Lines changed to remove a major bug by Rohit Reja.
+                                window, chrum = get_window(replicate.chromosomes[chromosome.name],
+                                                           group.peaks.values(),
+                                                           distance)
+                                match = METHODS[method](window, peak, chrum)
+                            except KeyError:
+                                continue
+                            if match:
+                                group.add_peak(replicate.id, match)
+                                new_match = True
+                        if not new_match:
+                            break
+                # Attempt to enlarge existing peak groups
+                for group in peak_groups:
+                    old_peaks = group.peaks.values()[:]
+                    search_for_matches(group)
+                    for peak in group.peaks.values():
+                        if peak not in old_peaks:
+                            peak.replicate.chromosomes[chromosome.name].remove_peak(peak)
+                # Attempt to find new peaks groups.  For each peak in the
+                # main replicate, search for matches in the other replicates
+                for peak in peaks_by_value:
+                    matches = PeakGroup()
+                    matches.add_peak(main.id, peak)
+                    search_for_matches(matches)
+                    # Were enough replicates matched?
+                    if matches.num_replicates >= num_required:
+                        for peak in matches.peaks.values():
+                            peak.replicate.chromosomes[chromosome.name].remove_peak(peak)
+                        peak_groups.append(matches)
+    # Zero or less = no stepping
+    if step <= 0:
+        do_match(replicates, distance)
+    else:
+        for d in range(0, distance, step):
+            do_match(replicates, d)
+    for group in peak_groups:
+        freq.add(group.num_replicates)
+    # Collect together the remaining unmatched_peaks
+    for replicate in replicates:
+        for chromosome in replicate.chromosomes.values():
+            for peak in chromosome.peaks:
+                freq.add(1)
+                unmatched_peaks.append(peak)
+    # Average the unmatched_peaks count in the graph by # replicates
+    med = median([peak.value for group in peak_groups for peak in group.peaks.values()])
+    for replicate in replicates:
+        replicate.median = median([peak.value for group in peak_groups for peak in group.peaks.values() if peak.replicate == replicate])
+        statistics_table_output.writerow((replicate.id, replicate.median))
+    for group in peak_groups:
+        # Output matched_peaks (matched pairs).
+        matched_peaks_output.writerow(gff_row(cname=group.chrom,
+                                              start=group.midpoint,
+                                              end=group.midpoint+1,
+                                              source='repmatch',
+                                              score=group.normalized_value(med),
+                                              attrs={'median_distance': group.median_distance,
+                                                     'replicates': group.num_replicates,
+                                                     'value_sum': group.value_sum}))
+        if output_detail_file:
+            matched_peaks = (group.chrom,
+                             group.midpoint,
+                             group.midpoint+1,
+                             group.normalized_value(med),
+                             group.num_replicates,
+                             group.median_distance,
+                             group.value_sum)
+            for peak in group.peaks.values():
+                matched_peaks += (peak.chrom, peak.midpoint, peak.midpoint+1, peak.value, peak.distance, peak.replicate.id)
+            detail_output.writerow(matched_peaks)
+    if output_unmatched_peaks_file:
+        for unmatched_peak in unmatched_peaks:
+            unmatched_peaks_output.writerow((unmatched_peak.chrom,
+                                             unmatched_peak.midpoint,
+                                             unmatched_peak.midpoint+1,
+                                             unmatched_peak.value,
+                                             unmatched_peak.distance,
+                                             unmatched_peak.replicate.id))
+    if output_statistics_histogram_file:
+        tmp_statistics_histogram_path = get_temporary_plot_path()
+        frequency_histogram([freq], tmp_statistics_histogram_path)
+        shutil.move(tmp_statistics_histogram_path, output_statistics_histogram)
+    return {'distribution': freq}
Binary file static/images/repmatch.png has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/closest_matched_pairs_input1.gff	Wed Dec 23 09:24:35 2015 -0500
@@ -0,0 +1,66 @@
+chr1	cwpair	.	59	60	2881.0	.	.	cw_distance=2
+chr1	cwpair	.	123	124	4204.0	.	.	cw_distance=52
+chr1	cwpair	.	156	157	2177.0	.	.	cw_distance=59
+chr1	cwpair	.	218	219	4022.0	.	.	cw_distance=14
+chr1	cwpair	.	265	266	2474.0	.	.	cw_distance=48
+chr1	cwpair	.	268	269	4088.0	.	.	cw_distance=6
+chr1	cwpair	.	325	326	1171.0	.	.	cw_distance=16
+chr1	cwpair	.	370	371	899.0	.	.	cw_distance=25
+chr1	cwpair	.	388	389	359.0	.	.	cw_distance=20
+chr1	cwpair	.	452	453	504.0	.	.	cw_distance=8
+chr1	cwpair	.	500	501	569.0	.	.	cw_distance=-44
+chr1	cwpair	.	668	669	319.0	.	.	cw_distance=-48
+chr1	cwpair	.	6218	6219	2125.0	.	.	cw_distance=91
+chr1	cwpair	.	6454	6455	1249.0	.	.	cw_distance=63
+chr1	cwpair	.	6714	6715	433.0	.	.	cw_distance=-4
+chr1	cwpair	.	19213	19214	778.0	.	.	cw_distance=-25
+chr1	cwpair	.	22580	22581	863.0	.	.	cw_distance=-2
+chr1	cwpair	.	25305	25306	1183.0	.	.	cw_distance=99
+chr1	cwpair	.	31670	31671	490.0	.	.	cw_distance=66
+chr1	cwpair	.	32483	32484	478.0	.	.	cw_distance=48
+chr1	cwpair	.	39076	39077	1350.0	.	.	cw_distance=-29
+chr1	cwpair	.	39237	39238	362.0	.	.	cw_distance=61
+chr1	cwpair	.	45670	45671	493.0	.	.	cw_distance=-35
+chr1	cwpair	.	55548	55549	956.0	.	.	cw_distance=86
+chr1	cwpair	.	59228	59229	565.0	.	.	cw_distance=56
+chr1	cwpair	.	65160	65161	618.0	.	.	cw_distance=-4
+chr1	cwpair	.	70792	70793	2146.0	.	.	cw_distance=12
+chr1	cwpair	.	72731	72732	710.0	.	.	cw_distance=100
+chr1	cwpair	.	72805	72806	869.0	.	.	cw_distance=29
+chr1	cwpair	.	86982	86983	2013.0	.	.	cw_distance=37
+chr1	cwpair	.	87044	87045	1191.0	.	.	cw_distance=30
+chr1	cwpair	.	87109	87110	2259.0	.	.	cw_distance=3
+chr1	cwpair	.	87162	87163	5531.0	.	.	cw_distance=11
+chr1	cwpair	.	87194	87195	3643.0	.	.	cw_distance=27
+chr1	cwpair	.	92421	92422	1388.0	.	.	cw_distance=0
+chr1	cwpair	.	92567	92568	789.0	.	.	cw_distance=28
+chr1	cwpair	.	92645	92646	2397.0	.	.	cw_distance=8
+chr1	cwpair	.	95955	95956	689.0	.	.	cw_distance=51
+chr1	cwpair	.	96919	96920	12.0	.	.	cw_distance=3
+chr1	cwpair	.	98551	98552	122.0	.	.	cw_distance=27
+chr1	cwpair	.	101399	101400	2361.0	.	.	cw_distance=-44
+chr1	cwpair	.	106047	106048	572.0	.	.	cw_distance=7
+chr1	cwpair	.	108611	108612	573.0	.	.	cw_distance=-45
+chr1	cwpair	.	113782	113783	716.0	.	.	cw_distance=-20
+chr1	cwpair	.	116649	116650	773.0	.	.	cw_distance=-41
+chr1	cwpair	.	124306	124307	761.0	.	.	cw_distance=-43
+chr1	cwpair	.	134230	134231	659.0	.	.	cw_distance=100
+chr1	cwpair	.	136369	136370	365.0	.	.	cw_distance=-14
+chr1	cwpair	.	138876	138877	711.0	.	.	cw_distance=-4
+chr1	cwpair	.	139230	139231	1179.0	.	.	cw_distance=15
+chr1	cwpair	.	151365	151366	595.0	.	.	cw_distance=-28
+chr1	cwpair	.	155079	155080	1573.0	.	.	cw_distance=83
+chr1	cwpair	.	169095	169096	1887.0	.	.	cw_distance=-43
+chr1	cwpair	.	170134	170135	657.0	.	.	cw_distance=10
+chr1	cwpair	.	173276	173277	546.0	.	.	cw_distance=8
+chr1	cwpair	.	180331	180332	97.0	.	.	cw_distance=82
+chr1	cwpair	.	185109	185110	1371.0	.	.	cw_distance=46
+chr1	cwpair	.	197535	197536	5.0	.	.	cw_distance=73
+chr1	cwpair	.	199413	199414	810.0	.	.	cw_distance=-30
+chr1	cwpair	.	203863	203864	1476.0	.	.	cw_distance=-37
+chr1	cwpair	.	228672	228673	626.0	.	.	cw_distance=58
+chr1	cwpair	.	229759	229760	4531.0	.	.	cw_distance=16
+chr1	cwpair	.	229762	229763	699.0	.	.	cw_distance=63
+chr1	cwpair	.	230125	230126	44.0	.	.	cw_distance=10
+chr1	cwpair	.	230157	230158	15.0	.	.	cw_distance=5
+chr1	cwpair	.	230178	230179	56.0	.	.	cw_distance=10
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/detail_out1.tabular	Wed Dec 23 09:24:35 2015 -0500
@@ -0,0 +1,65 @@
+chrom	median midpoint	median midpoint+1	median normalized reads	replicates	median c-w distance	reads sum	chrom	median midpoint	median midpoint+1	c-w sum	c-w distance	replicate id	chrom	median midpoint	median midpoint+1	c-w sum	c-w distance	replicate id
+chr1	87168	87169	4488.704113924051	2	-1	9006.0	chr1	87162	87163	5531.0	11	1	chr1	87174	87175	3475.0	-13	2
+chr1	229759	229760	4512.3598101265825	2	16	9062.0	chr1	229759	229760	4531.0	16	1	chr1	229759	229760	4531.0	16	2
+chr1	123	124	4186.70506329114	2	52	8408.0	chr1	123	124	4204.0	52	1	chr1	123	124	4204.0	52	2
+chr1	262	263	3246.0278481012656	2	18	6512.0	chr1	268	269	4088.0	6	1	chr1	256	257	2424.0	30	2
+chr1	231	232	4699.198417721519	2	-13	9443.0	chr1	218	219	4022.0	14	1	chr1	245	246	5421.0	-40	2
+chr1	87188	87189	4647.554746835443	2	39	9342.0	chr1	87194	87195	3643.0	27	1	chr1	87182	87183	5699.0	51	2
+chr1	59	60	2869.1477848101267	2	2	5762.0	chr1	59	60	2881.0	2	1	chr1	59	60	2881.0	2	2
+chr1	257	258	2595.2319620253165	2	63	5213.0	chr1	265	266	2474.0	48	1	chr1	250	251	2739.0	78	2
+chr1	92651	92652	1420.1610759493672	2	20	2844.0	chr1	92645	92646	2397.0	8	1	chr1	92657	92658	447.0	33	2
+chr1	101399	101400	2351.2870253164556	2	-44	4722.0	chr1	101399	101400	2361.0	-44	1	chr1	101399	101400	2361.0	-44	2
+chr1	87109	87110	2249.7066455696204	2	3	4518.0	chr1	87109	87110	2259.0	3	1	chr1	87109	87110	2259.0	3	2
+chr1	156	157	2168.043987341772	2	59	4354.0	chr1	156	157	2177.0	59	1	chr1	156	157	2177.0	59	2
+chr1	70792	70793	2137.171518987342	2	12	4292.0	chr1	70792	70793	2146.0	12	1	chr1	70792	70793	2146.0	12	2
+chr1	6218	6219	2116.257911392405	2	91	4250.0	chr1	6218	6219	2125.0	91	1	chr1	6218	6219	2125.0	91	2
+chr1	86996	86997	2181.75	2	66	4383.0	chr1	86982	86983	2013.0	37	1	chr1	87011	87012	2370.0	95	2
+chr1	169095	169096	1879.2370253164559	2	-43	3774.0	chr1	169095	169096	1887.0	-43	1	chr1	169095	169096	1887.0	-43	2
+chr1	155079	155080	1566.5287974683545	2	83	3146.0	chr1	155079	155080	1573.0	83	1	chr1	155079	155080	1573.0	83	2
+chr1	203863	203864	1469.9278481012657	2	-37	2952.0	chr1	203863	203864	1476.0	-37	1	chr1	203863	203864	1476.0	-37	2
+chr1	92421	92422	1382.2898734177215	2	0	2776.0	chr1	92421	92422	1388.0	0	1	chr1	92421	92422	1388.0	0	2
+chr1	185109	185110	1365.3598101265823	2	46	2742.0	chr1	185109	185110	1371.0	46	1	chr1	185109	185110	1371.0	46	2
+chr1	39076	39077	1344.4462025316457	2	-29	2700.0	chr1	39076	39077	1350.0	-29	1	chr1	39076	39077	1350.0	-29	2
+chr1	6454	6455	1243.8617088607593	2	63	2498.0	chr1	6454	6455	1249.0	63	1	chr1	6454	6455	1249.0	63	2
+chr1	87029	87030	1009.0689873417721	2	1	2025.0	chr1	87044	87045	1191.0	30	1	chr1	87015	87016	834.0	-28	2
+chr1	25305	25306	1178.1332278481013	2	99	2366.0	chr1	25305	25306	1183.0	99	1	chr1	25305	25306	1183.0	99	2
+chr1	139230	139231	1174.1496835443038	2	15	2358.0	chr1	139230	139231	1179.0	15	1	chr1	139230	139231	1179.0	15	2
+chr1	335	336	1173.125	2	-5	2356.0	chr1	325	326	1171.0	16	1	chr1	345	346	1185.0	-25	2
+chr1	55548	55549	952.067088607595	2	86	1912.0	chr1	55548	55549	956.0	86	1	chr1	55548	55549	956.0	86	2
+chr1	360	361	888.3591772151899	2	45	1784.0	chr1	370	371	899.0	25	1	chr1	350	351	885.0	66	2
+chr1	72795	72796	961.6268987341772	2	9	1932.0	chr1	72805	72806	869.0	29	1	chr1	72786	72787	1063.0	-10	2
+chr1	22580	22581	859.4496835443038	2	-2	1726.0	chr1	22580	22581	863.0	-2	1	chr1	22580	22581	863.0	-2	2
+chr1	199413	199414	806.6677215189873	2	-30	1620.0	chr1	199413	199414	810.0	-30	1	chr1	199413	199414	810.0	-30	2
+chr1	92584	92585	1800.832911392405	2	62	3625.0	chr1	92567	92568	789.0	28	1	chr1	92601	92602	2836.0	96	2
+chr1	19213	19214	774.7993670886076	2	-25	1556.0	chr1	19213	19214	778.0	-25	1	chr1	19213	19214	778.0	-25	2
+chr1	116649	116650	769.8199367088607	2	-41	1546.0	chr1	116649	116650	773.0	-41	1	chr1	116649	116650	773.0	-41	2
+chr1	124306	124307	757.8693037974683	2	-43	1522.0	chr1	124306	124307	761.0	-43	1	chr1	124306	124307	761.0	-43	2
+chr1	113782	113783	713.0544303797469	2	-20	1432.0	chr1	113782	113783	716.0	-20	1	chr1	113782	113783	716.0	-20	2
+chr1	138876	138877	708.075	2	-4	1422.0	chr1	138876	138877	711.0	-4	1	chr1	138876	138877	711.0	-4	2
+chr1	229762	229763	696.1243670886076	2	63	1398.0	chr1	229762	229763	699.0	63	1	chr1	229762	229763	699.0	63	2
+chr1	95955	95956	686.1655063291139	2	51	1378.0	chr1	95955	95956	689.0	51	1	chr1	95955	95956	689.0	51	2
+chr1	134230	134231	656.2889240506329	2	100	1318.0	chr1	134230	134231	659.0	100	1	chr1	134230	134231	659.0	100	2
+chr1	170134	170135	654.2971518987342	2	10	1314.0	chr1	170134	170135	657.0	10	1	chr1	170134	170135	657.0	10	2
+chr1	228672	228673	623.4246835443038	2	58	1252.0	chr1	228672	228673	626.0	58	1	chr1	228672	228673	626.0	58	2
+chr1	65160	65161	615.4575949367088	2	-4	1236.0	chr1	65160	65161	618.0	-4	1	chr1	65160	65161	618.0	-4	2
+chr1	151365	151366	592.5522151898734	2	-28	1190.0	chr1	151365	151366	595.0	-28	1	chr1	151365	151366	595.0	-28	2
+chr1	108611	108612	570.6427215189874	2	-45	1146.0	chr1	108611	108612	573.0	-45	1	chr1	108611	108612	573.0	-45	2
+chr1	106047	106048	569.646835443038	2	7	1144.0	chr1	106047	106048	572.0	7	1	chr1	106047	106048	572.0	7	2
+chr1	481	482	682.2006329113924	2	-7	1371.0	chr1	500	501	569.0	-44	1	chr1	463	464	802.0	30	2
+chr1	59228	59229	562.6756329113924	2	56	1130.0	chr1	59228	59229	565.0	56	1	chr1	59228	59229	565.0	56	2
+chr1	173276	173277	543.7537974683544	2	8	1092.0	chr1	173276	173277	546.0	8	1	chr1	173276	173277	546.0	8	2
+chr1	434	435	431.5107594936709	2	43	866.0	chr1	452	453	504.0	8	1	chr1	417	418	362.0	78	2
+chr1	45670	45671	490.971835443038	2	-35	986.0	chr1	45670	45671	493.0	-35	1	chr1	45670	45671	493.0	-35	2
+chr1	31670	31671	487.9841772151899	2	66	980.0	chr1	31670	31671	490.0	66	1	chr1	31670	31671	490.0	66	2
+chr1	32483	32484	476.0335443037975	2	48	956.0	chr1	32483	32484	478.0	48	1	chr1	32483	32484	478.0	48	2
+chr1	6714	6715	431.218670886076	2	-4	866.0	chr1	6714	6715	433.0	-4	1	chr1	6714	6715	433.0	-4	2
+chr1	136369	136370	363.498417721519	2	-14	730.0	chr1	136369	136370	365.0	-14	1	chr1	136369	136370	365.0	-14	2
+chr1	39237	39238	360.5107594936709	2	61	724.0	chr1	39237	39238	362.0	61	1	chr1	39237	39238	362.0	61	2
+chr1	668	669	317.6876582278481	2	-48	638.0	chr1	668	669	319.0	-48	1	chr1	668	669	319.0	-48	2
+chr1	98551	98552	121.49810126582278	2	27	244.0	chr1	98551	98552	122.0	27	1	chr1	98551	98552	122.0	27	2
+chr1	180331	180332	96.60094936708862	2	82	194.0	chr1	180331	180332	97.0	82	1	chr1	180331	180332	97.0	82	2
+chr1	230172	230173	42.87658227848101	2	-2	86.0	chr1	230178	230179	56.0	10	1	chr1	230166	230167	30.0	-13	2
+chr1	230133	230134	26.95886075949367	2	-8	54.0	chr1	230125	230126	44.0	10	1	chr1	230142	230143	10.0	-25	2
+chr1	230154	230155	44.69145569620253	2	34	90.0	chr1	230157	230158	15.0	5	1	chr1	230151	230152	75.0	63	2
+chr1	96919	96920	11.950632911392404	2	3	24.0	chr1	96919	96920	12.0	3	1	chr1	96919	96920	12.0	3	2
+chr1	197535	197536	4.9794303797468356	2	73	10.0	chr1	197535	197536	5.0	73	1	chr1	197535	197536	5.0	73	2
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/largest_matched_pairs_input1.gff	Wed Dec 23 09:24:35 2015 -0500
@@ -0,0 +1,64 @@
+chr1	cwpair	.	59	60	2881.0	.	.	cw_distance=2
+chr1	cwpair	.	123	124	4204.0	.	.	cw_distance=52
+chr1	cwpair	.	156	157	2177.0	.	.	cw_distance=59
+chr1	cwpair	.	245	246	5421.0	.	.	cw_distance=-40
+chr1	cwpair	.	250	251	2739.0	.	.	cw_distance=78
+chr1	cwpair	.	256	257	2424.0	.	.	cw_distance=30
+chr1	cwpair	.	345	346	1185.0	.	.	cw_distance=-25
+chr1	cwpair	.	350	351	885.0	.	.	cw_distance=66
+chr1	cwpair	.	417	418	362.0	.	.	cw_distance=78
+chr1	cwpair	.	463	464	802.0	.	.	cw_distance=30
+chr1	cwpair	.	668	669	319.0	.	.	cw_distance=-48
+chr1	cwpair	.	6218	6219	2125.0	.	.	cw_distance=91
+chr1	cwpair	.	6454	6455	1249.0	.	.	cw_distance=63
+chr1	cwpair	.	6714	6715	433.0	.	.	cw_distance=-4
+chr1	cwpair	.	19213	19214	778.0	.	.	cw_distance=-25
+chr1	cwpair	.	22580	22581	863.0	.	.	cw_distance=-2
+chr1	cwpair	.	25305	25306	1183.0	.	.	cw_distance=99
+chr1	cwpair	.	31670	31671	490.0	.	.	cw_distance=66
+chr1	cwpair	.	32483	32484	478.0	.	.	cw_distance=48
+chr1	cwpair	.	39076	39077	1350.0	.	.	cw_distance=-29
+chr1	cwpair	.	39237	39238	362.0	.	.	cw_distance=61
+chr1	cwpair	.	45670	45671	493.0	.	.	cw_distance=-35
+chr1	cwpair	.	55548	55549	956.0	.	.	cw_distance=86
+chr1	cwpair	.	59228	59229	565.0	.	.	cw_distance=56
+chr1	cwpair	.	65160	65161	618.0	.	.	cw_distance=-4
+chr1	cwpair	.	70792	70793	2146.0	.	.	cw_distance=12
+chr1	cwpair	.	72786	72787	1063.0	.	.	cw_distance=-10
+chr1	cwpair	.	87011	87012	2370.0	.	.	cw_distance=95
+chr1	cwpair	.	87015	87016	834.0	.	.	cw_distance=-28
+chr1	cwpair	.	87109	87110	2259.0	.	.	cw_distance=3
+chr1	cwpair	.	87174	87175	3475.0	.	.	cw_distance=-13
+chr1	cwpair	.	87182	87183	5699.0	.	.	cw_distance=51
+chr1	cwpair	.	92421	92422	1388.0	.	.	cw_distance=0
+chr1	cwpair	.	92601	92602	2836.0	.	.	cw_distance=96
+chr1	cwpair	.	92657	92658	447.0	.	.	cw_distance=33
+chr1	cwpair	.	95955	95956	689.0	.	.	cw_distance=51
+chr1	cwpair	.	96919	96920	12.0	.	.	cw_distance=3
+chr1	cwpair	.	98551	98552	122.0	.	.	cw_distance=27
+chr1	cwpair	.	101399	101400	2361.0	.	.	cw_distance=-44
+chr1	cwpair	.	106047	106048	572.0	.	.	cw_distance=7
+chr1	cwpair	.	108611	108612	573.0	.	.	cw_distance=-45
+chr1	cwpair	.	113782	113783	716.0	.	.	cw_distance=-20
+chr1	cwpair	.	116649	116650	773.0	.	.	cw_distance=-41
+chr1	cwpair	.	124306	124307	761.0	.	.	cw_distance=-43
+chr1	cwpair	.	134230	134231	659.0	.	.	cw_distance=100
+chr1	cwpair	.	136369	136370	365.0	.	.	cw_distance=-14
+chr1	cwpair	.	138876	138877	711.0	.	.	cw_distance=-4
+chr1	cwpair	.	139230	139231	1179.0	.	.	cw_distance=15
+chr1	cwpair	.	151365	151366	595.0	.	.	cw_distance=-28
+chr1	cwpair	.	155079	155080	1573.0	.	.	cw_distance=83
+chr1	cwpair	.	169095	169096	1887.0	.	.	cw_distance=-43
+chr1	cwpair	.	170134	170135	657.0	.	.	cw_distance=10
+chr1	cwpair	.	173276	173277	546.0	.	.	cw_distance=8
+chr1	cwpair	.	180331	180332	97.0	.	.	cw_distance=82
+chr1	cwpair	.	185109	185110	1371.0	.	.	cw_distance=46
+chr1	cwpair	.	197535	197536	5.0	.	.	cw_distance=73
+chr1	cwpair	.	199413	199414	810.0	.	.	cw_distance=-30
+chr1	cwpair	.	203863	203864	1476.0	.	.	cw_distance=-37
+chr1	cwpair	.	228672	228673	626.0	.	.	cw_distance=58
+chr1	cwpair	.	229759	229760	4531.0	.	.	cw_distance=16
+chr1	cwpair	.	229762	229763	699.0	.	.	cw_distance=63
+chr1	cwpair	.	230142	230143	10.0	.	.	cw_distance=-25
+chr1	cwpair	.	230151	230152	75.0	.	.	cw_distance=63
+chr1	cwpair	.	230166	230167	30.0	.	.	cw_distance=-13
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/matched_peaks_out1.gff	Wed Dec 23 09:24:35 2015 -0500
@@ -0,0 +1,64 @@
+chr1	repmatch	.	87168	87169	4488.704113924051	.	.	median_distance=-1;value_sum=9006.0;replicates=2
+chr1	repmatch	.	229759	229760	4512.3598101265825	.	.	median_distance=16;value_sum=9062.0;replicates=2
+chr1	repmatch	.	123	124	4186.70506329114	.	.	median_distance=52;value_sum=8408.0;replicates=2
+chr1	repmatch	.	262	263	3246.0278481012656	.	.	median_distance=18;value_sum=6512.0;replicates=2
+chr1	repmatch	.	231	232	4699.198417721519	.	.	median_distance=-13;value_sum=9443.0;replicates=2
+chr1	repmatch	.	87188	87189	4647.554746835443	.	.	median_distance=39;value_sum=9342.0;replicates=2
+chr1	repmatch	.	59	60	2869.1477848101267	.	.	median_distance=2;value_sum=5762.0;replicates=2
+chr1	repmatch	.	257	258	2595.2319620253165	.	.	median_distance=63;value_sum=5213.0;replicates=2
+chr1	repmatch	.	92651	92652	1420.1610759493672	.	.	median_distance=20;value_sum=2844.0;replicates=2
+chr1	repmatch	.	101399	101400	2351.2870253164556	.	.	median_distance=-44;value_sum=4722.0;replicates=2
+chr1	repmatch	.	87109	87110	2249.7066455696204	.	.	median_distance=3;value_sum=4518.0;replicates=2
+chr1	repmatch	.	156	157	2168.043987341772	.	.	median_distance=59;value_sum=4354.0;replicates=2
+chr1	repmatch	.	70792	70793	2137.171518987342	.	.	median_distance=12;value_sum=4292.0;replicates=2
+chr1	repmatch	.	6218	6219	2116.257911392405	.	.	median_distance=91;value_sum=4250.0;replicates=2
+chr1	repmatch	.	86996	86997	2181.75	.	.	median_distance=66;value_sum=4383.0;replicates=2
+chr1	repmatch	.	169095	169096	1879.2370253164559	.	.	median_distance=-43;value_sum=3774.0;replicates=2
+chr1	repmatch	.	155079	155080	1566.5287974683545	.	.	median_distance=83;value_sum=3146.0;replicates=2
+chr1	repmatch	.	203863	203864	1469.9278481012657	.	.	median_distance=-37;value_sum=2952.0;replicates=2
+chr1	repmatch	.	92421	92422	1382.2898734177215	.	.	median_distance=0;value_sum=2776.0;replicates=2
+chr1	repmatch	.	185109	185110	1365.3598101265823	.	.	median_distance=46;value_sum=2742.0;replicates=2
+chr1	repmatch	.	39076	39077	1344.4462025316457	.	.	median_distance=-29;value_sum=2700.0;replicates=2
+chr1	repmatch	.	6454	6455	1243.8617088607593	.	.	median_distance=63;value_sum=2498.0;replicates=2
+chr1	repmatch	.	87029	87030	1009.0689873417721	.	.	median_distance=1;value_sum=2025.0;replicates=2
+chr1	repmatch	.	25305	25306	1178.1332278481013	.	.	median_distance=99;value_sum=2366.0;replicates=2
+chr1	repmatch	.	139230	139231	1174.1496835443038	.	.	median_distance=15;value_sum=2358.0;replicates=2
+chr1	repmatch	.	335	336	1173.125	.	.	median_distance=-5;value_sum=2356.0;replicates=2
+chr1	repmatch	.	55548	55549	952.067088607595	.	.	median_distance=86;value_sum=1912.0;replicates=2
+chr1	repmatch	.	360	361	888.3591772151899	.	.	median_distance=45;value_sum=1784.0;replicates=2
+chr1	repmatch	.	72795	72796	961.6268987341772	.	.	median_distance=9;value_sum=1932.0;replicates=2
+chr1	repmatch	.	22580	22581	859.4496835443038	.	.	median_distance=-2;value_sum=1726.0;replicates=2
+chr1	repmatch	.	199413	199414	806.6677215189873	.	.	median_distance=-30;value_sum=1620.0;replicates=2
+chr1	repmatch	.	92584	92585	1800.832911392405	.	.	median_distance=62;value_sum=3625.0;replicates=2
+chr1	repmatch	.	19213	19214	774.7993670886076	.	.	median_distance=-25;value_sum=1556.0;replicates=2
+chr1	repmatch	.	116649	116650	769.8199367088607	.	.	median_distance=-41;value_sum=1546.0;replicates=2
+chr1	repmatch	.	124306	124307	757.8693037974683	.	.	median_distance=-43;value_sum=1522.0;replicates=2
+chr1	repmatch	.	113782	113783	713.0544303797469	.	.	median_distance=-20;value_sum=1432.0;replicates=2
+chr1	repmatch	.	138876	138877	708.075	.	.	median_distance=-4;value_sum=1422.0;replicates=2
+chr1	repmatch	.	229762	229763	696.1243670886076	.	.	median_distance=63;value_sum=1398.0;replicates=2
+chr1	repmatch	.	95955	95956	686.1655063291139	.	.	median_distance=51;value_sum=1378.0;replicates=2
+chr1	repmatch	.	134230	134231	656.2889240506329	.	.	median_distance=100;value_sum=1318.0;replicates=2
+chr1	repmatch	.	170134	170135	654.2971518987342	.	.	median_distance=10;value_sum=1314.0;replicates=2
+chr1	repmatch	.	228672	228673	623.4246835443038	.	.	median_distance=58;value_sum=1252.0;replicates=2
+chr1	repmatch	.	65160	65161	615.4575949367088	.	.	median_distance=-4;value_sum=1236.0;replicates=2
+chr1	repmatch	.	151365	151366	592.5522151898734	.	.	median_distance=-28;value_sum=1190.0;replicates=2
+chr1	repmatch	.	108611	108612	570.6427215189874	.	.	median_distance=-45;value_sum=1146.0;replicates=2
+chr1	repmatch	.	106047	106048	569.646835443038	.	.	median_distance=7;value_sum=1144.0;replicates=2
+chr1	repmatch	.	481	482	682.2006329113924	.	.	median_distance=-7;value_sum=1371.0;replicates=2
+chr1	repmatch	.	59228	59229	562.6756329113924	.	.	median_distance=56;value_sum=1130.0;replicates=2
+chr1	repmatch	.	173276	173277	543.7537974683544	.	.	median_distance=8;value_sum=1092.0;replicates=2
+chr1	repmatch	.	434	435	431.5107594936709	.	.	median_distance=43;value_sum=866.0;replicates=2
+chr1	repmatch	.	45670	45671	490.971835443038	.	.	median_distance=-35;value_sum=986.0;replicates=2
+chr1	repmatch	.	31670	31671	487.9841772151899	.	.	median_distance=66;value_sum=980.0;replicates=2
+chr1	repmatch	.	32483	32484	476.0335443037975	.	.	median_distance=48;value_sum=956.0;replicates=2
+chr1	repmatch	.	6714	6715	431.218670886076	.	.	median_distance=-4;value_sum=866.0;replicates=2
+chr1	repmatch	.	136369	136370	363.498417721519	.	.	median_distance=-14;value_sum=730.0;replicates=2
+chr1	repmatch	.	39237	39238	360.5107594936709	.	.	median_distance=61;value_sum=724.0;replicates=2
+chr1	repmatch	.	668	669	317.6876582278481	.	.	median_distance=-48;value_sum=638.0;replicates=2
+chr1	repmatch	.	98551	98552	121.49810126582278	.	.	median_distance=27;value_sum=244.0;replicates=2
+chr1	repmatch	.	180331	180332	96.60094936708862	.	.	median_distance=82;value_sum=194.0;replicates=2
+chr1	repmatch	.	230172	230173	42.87658227848101	.	.	median_distance=-2;value_sum=86.0;replicates=2
+chr1	repmatch	.	230133	230134	26.95886075949367	.	.	median_distance=-8;value_sum=54.0;replicates=2
+chr1	repmatch	.	230154	230155	44.69145569620253	.	.	median_distance=34;value_sum=90.0;replicates=2
+chr1	repmatch	.	96919	96920	11.950632911392404	.	.	median_distance=3;value_sum=24.0;replicates=2
+chr1	repmatch	.	197535	197536	4.9794303797468356	.	.	median_distance=73;value_sum=10.0;replicates=2
Binary file test-data/statistics_histogram_out1.pdf has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/statistics_table_out1.tabular	Wed Dec 23 09:24:35 2015 -0500
@@ -0,0 +1,3 @@
+data	median read count
+1	783.5
+2	790.0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/unmatched_peaks_out1.tabular	Wed Dec 23 09:24:35 2015 -0500
@@ -0,0 +1,3 @@
+chrom	midpoint	midpoint+1	c-w sum	c-w distance	replicate id
+chr1	388	389	359.0	20	1
+chr1	72731	72732	710.0	100	1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Wed Dec 23 09:24:35 2015 -0500
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="anaconda" version="2.3.0">
+        <repository changeset_revision="d3f29b11da06" name="package_anaconda_2_3_0" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>