Mercurial > repos > rico > genome_diversity

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/LocationFile.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,69 @@
+#!/usr/bin/env python
+
+import sys
+
+def die( message ):
+    print >> sys.stderr, message
+    sys.exit(1)
+
+def open_or_die( filename, mode='r', message=None ):
+    if message is None:
+        message = 'Error opening {0}'.format( filename )
+    try:
+        fh = open( filename, mode )
+    except IOError, err:
+        die( '{0}: {1}'.format( message, err.strerror ) )
+    return fh
+
+class LocationFile( object ):
+    def __init__( self, filename, comment_chars=None, delimiter='\t', key_column=0 ):
+        self.filename = filename
+        if comment_chars is None:
+            self.comment_chars = ( '#' )
+        else:
+            self.comment_chars = tuple( comment_chars )
+        self.delimiter = delimiter
+        self.key_column = key_column
+        self._map = {}
+        self._populate_map()
+
+    def _populate_map( self ):
+        try:
+            with open( self.filename ) as fh:
+                line_number = 0
+                for line in fh:
+                    line_number += 1
+                    line = line.rstrip( '\r\n' )
+                    if not line.startswith( self.comment_chars ):
+                        elems = line.split( self.delimiter )
+                        if len( elems ) <= self.key_column:
+                            die( 'Location file {0} line {1}: less than {2} columns'.format( self.filename, line_number, self.key_column + 1 ) )
+                        else:
+                            key = elems.pop( self.key_column )
+                            if key in self._map:
+                                if self._map[key] != elems:
+                                    die( 'Location file {0} line {1}: duplicate key "{2}"'.format( self.filename, line_number, key ) )
+                            else:
+                                self._map[key] = elems
+        except IOError, err:
+            die( 'Error opening location file {0}: {1}'.format( self.filename, err.strerror ) )
+
+    def get_values( self, key ):
+        if key in self._map:
+            rval = self._map[key]
+            if len( rval ) == 1:
+                return rval[0]
+            else:
+                return rval
+        else:
+            die( 'key "{0}" not found in location file {1}'.format( key, self.filename ) )
+
+    def get_values_if_exists( self, key ):
+        if key in self._map:
+            rval = self._map[key]
+            if len( rval ) == 1:
+                return rval[0]
+            else:
+                return rval
+        else:
+            return None
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/OrderedDict.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,259 @@
+# http://code.activestate.com/recipes/576693/
+# Backport of OrderedDict() class that runs on Python 2.4, 2.5, 2.6, 2.7 and pypy.
+# Passes Python2.7's test suite and incorporates all the latest updates.
+
+try:
+    from thread import get_ident as _get_ident
+except ImportError:
+    from dummy_thread import get_ident as _get_ident
+
+try:
+    from _abcoll import KeysView, ValuesView, ItemsView
+except ImportError:
+    pass
+
+
+class OrderedDict(dict):
+    'Dictionary that remembers insertion order'
+    # An inherited dict maps keys to values.
+    # The inherited dict provides __getitem__, __len__, __contains__, and get.
+    # The remaining methods are order-aware.
+    # Big-O running times for all methods are the same as for regular dictionaries.
+
+    # The internal self.__map dictionary maps keys to links in a doubly linked list.
+    # The circular doubly linked list starts and ends with a sentinel element.
+    # The sentinel element never gets deleted (this simplifies the algorithm).
+    # Each link is stored as a list of length three:  [PREV, NEXT, KEY].
+
+    def __init__(self, *args, **kwds):
+        '''Initialize an ordered dictionary.  Signature is the same as for
+        regular dictionaries, but keyword arguments are not recommended
+        because their insertion order is arbitrary.
+
+        '''
+        if len(args) > 1:
+            raise TypeError('expected at most 1 arguments, got %d' % len(args))
+        try:
+            self.__root
+        except AttributeError:
+            self.__root = root = []                     # sentinel node
+            root[:] = [root, root, None]
+            self.__map = {}
+        self.__update(*args, **kwds)
+
+    def __setitem__(self, key, value, dict_setitem=dict.__setitem__):
+        'od.__setitem__(i, y) <==> od[i]=y'
+        # Setting a new item creates a new link which goes at the end of the linked
+        # list, and the inherited dictionary is updated with the new key/value pair.
+        if key not in self:
+            root = self.__root
+            last = root[0]
+            last[1] = root[0] = self.__map[key] = [last, root, key]
+        dict_setitem(self, key, value)
+
+    def __delitem__(self, key, dict_delitem=dict.__delitem__):
+        'od.__delitem__(y) <==> del od[y]'
+        # Deleting an existing item uses self.__map to find the link which is
+        # then removed by updating the links in the predecessor and successor nodes.
+        dict_delitem(self, key)
+        link_prev, link_next, key = self.__map.pop(key)
+        link_prev[1] = link_next
+        link_next[0] = link_prev
+
+    def __iter__(self):
+        'od.__iter__() <==> iter(od)'
+        root = self.__root
+        curr = root[1]
+        while curr is not root:
+            yield curr[2]
+            curr = curr[1]
+
+    def __reversed__(self):
+        'od.__reversed__() <==> reversed(od)'
+        root = self.__root
+        curr = root[0]
+        while curr is not root:
+            yield curr[2]
+            curr = curr[0]
+
+    def clear(self):
+        'od.clear() -> None.  Remove all items from od.'
+        try:
+            for node in self.__map.itervalues():
+                del node[:]
+            root = self.__root
+            root[:] = [root, root, None]
+            self.__map.clear()
+        except AttributeError:
+            pass
+        dict.clear(self)
+
+    def popitem(self, last=True):
+        '''od.popitem() -> (k, v), return and remove a (key, value) pair.
+        Pairs are returned in LIFO order if last is true or FIFO order if false.
+
+        '''
+        if not self:
+            raise KeyError('dictionary is empty')
+        root = self.__root
+        if last:
+            link = root[0]
+            link_prev = link[0]
+            link_prev[1] = root
+            root[0] = link_prev
+        else:
+            link = root[1]
+            link_next = link[1]
+            root[1] = link_next
+            link_next[0] = root
+        key = link[2]
+        del self.__map[key]
+        value = dict.pop(self, key)
+        return key, value
+
+    # -- the following methods do not depend on the internal structure --
+
+    def keys(self):
+        'od.keys() -> list of keys in od'
+        return list(self)
+
+    def values(self):
+        'od.values() -> list of values in od'
+        return [self[key] for key in self]
+
+    def items(self):
+        'od.items() -> list of (key, value) pairs in od'
+        return [(key, self[key]) for key in self]
+
+    def iterkeys(self):
+        'od.iterkeys() -> an iterator over the keys in od'
+        return iter(self)
+
+    def itervalues(self):
+        'od.itervalues -> an iterator over the values in od'
+        for k in self:
+            yield self[k]
+
+    def iteritems(self):
+        'od.iteritems -> an iterator over the (key, value) items in od'
+        for k in self:
+            yield (k, self[k])
+
+    def update(*args, **kwds):
+        '''od.update(E, **F) -> None.  Update od from dict/iterable E and F.
+
+        If E is a dict instance, does:           for k in E: od[k] = E[k]
+        If E has a .keys() method, does:         for k in E.keys(): od[k] = E[k]
+        Or if E is an iterable of items, does:   for k, v in E: od[k] = v
+        In either case, this is followed by:     for k, v in F.items(): od[k] = v
+
+        '''
+        if len(args) > 2:
+            raise TypeError('update() takes at most 2 positional '
+                            'arguments (%d given)' % (len(args),))
+        elif not args:
+            raise TypeError('update() takes at least 1 argument (0 given)')
+        self = args[0]
+        # Make progressively weaker assumptions about "other"
+        other = ()
+        if len(args) == 2:
+            other = args[1]
+        if isinstance(other, dict):
+            for key in other:
+                self[key] = other[key]
+        elif hasattr(other, 'keys'):
+            for key in other.keys():
+                self[key] = other[key]
+        else:
+            for key, value in other:
+                self[key] = value
+        for key, value in kwds.items():
+            self[key] = value
+
+    __update = update  # let subclasses override update without breaking __init__
+
+    __marker = object()
+
+    def pop(self, key, default=__marker):
+        '''od.pop(k[,d]) -> v, remove specified key and return the corresponding value.
+        If key is not found, d is returned if given, otherwise KeyError is raised.
+
+        '''
+        if key in self:
+            result = self[key]
+            del self[key]
+            return result
+        if default is self.__marker:
+            raise KeyError(key)
+        return default
+
+    def setdefault(self, key, default=None):
+        'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od'
+        if key in self:
+            return self[key]
+        self[key] = default
+        return default
+
+    def __repr__(self, _repr_running={}):
+        'od.__repr__() <==> repr(od)'
+        call_key = id(self), _get_ident()
+        if call_key in _repr_running:
+            return '...'
+        _repr_running[call_key] = 1
+        try:
+            if not self:
+                return '%s()' % (self.__class__.__name__,)
+            return '%s(%r)' % (self.__class__.__name__, self.items())
+        finally:
+            del _repr_running[call_key]
+
+    def __reduce__(self):
+        'Return state information for pickling'
+        items = [[k, self[k]] for k in self]
+        inst_dict = vars(self).copy()
+        for k in vars(OrderedDict()):
+            inst_dict.pop(k, None)
+        if inst_dict:
+            return (self.__class__, (items,), inst_dict)
+        return self.__class__, (items,)
+
+    def copy(self):
+        'od.copy() -> a shallow copy of od'
+        return self.__class__(self)
+
+    @classmethod
+    def fromkeys(cls, iterable, value=None):
+        '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
+        and values equal to v (which defaults to None).
+
+        '''
+        d = cls()
+        for key in iterable:
+            d[key] = value
+        return d
+
+    def __eq__(self, other):
+        '''od.__eq__(y) <==> od==y.  Comparison to another OD is order-sensitive
+        while comparison to a regular mapping is order-insensitive.
+
+        '''
+        if isinstance(other, OrderedDict):
+            return len(self)==len(other) and self.items() == other.items()
+        return dict.__eq__(self, other)
+
+    def __ne__(self, other):
+        return not self == other
+
+    # -- the following methods are only used in Python 2.7 --
+
+    def viewkeys(self):
+        "od.viewkeys() -> a set-like object providing a view on od's keys"
+        return KeysView(self)
+
+    def viewvalues(self):
+        "od.viewvalues() -> an object providing a view on od's values"
+        return ValuesView(self)
+
+    def viewitems(self):
+        "od.viewitems() -> a set-like object providing a view on od's items"
+        return ItemsView(self)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/Population.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,185 @@
+#!/usr/bin/env python
+
+import OrderedDict
+import base64
+import json
+import zlib
+
+import sys
+
+class Individual(object):
+    __slots__ = ['_column', '_name', '_alias']
+
+    def __init__(self, column, name, alias=None):
+        self._column = int(column)
+        self._name = name
+        self._alias = alias
+
+    @property
+    def column(self):
+        return self._column
+
+    @property
+    def name(self):
+        return self._name if self._alias is None else self._alias
+
+    @property
+    def alias(self):
+        return self._alias
+
+    @alias.setter
+    def alias(self, alias):
+        self._alias = alias
+
+    @property
+    def real_name(self):
+        return self._name
+
+    def __eq__(self, other):
+        return self._column == other._column and self._name == other._name
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+    def __repr__(self):
+        return 'Individual: column={0} name={1} alias={2}'.format(self._column, self._name, self._alias)
+
+
+class Population(object):
+    def __init__(self, name=None):
+        self._columns = OrderedDict.OrderedDict()
+        self._name = name
+
+    @property
+    def name(self):
+        return self._name
+
+    @name.setter
+    def name(self, name):
+        self._name = name
+
+    def add_individual(self, individual, alias=None):
+        if individual.column not in self._columns:
+            self._columns[individual.column] = individual
+        elif self._columns[individual.column] == individual:
+            # should should this be an error?
+            # should we replace the alias using this entry?
+            pass
+        else:
+            raise 'Duplicate column: {0}'.format(individual)
+
+    def is_superset(self, other):
+        for column, other_individual in other._columns.items():
+            our_individual = self._columns.get(column)
+            if our_individual is None or our_individual != other_individual:
+                return False
+        return True
+
+    def is_disjoint(self, other):
+        for column, our_individual in self._columns.items():
+            other_individual = other._columns.get(column)
+            if other_individual is not None and other_individual == our_individual:
+                return False
+        return True
+
+    def column_list(self):
+        return self._columns.keys()
+
+    def individual_with_column(self, column):
+        if column in self._columns:
+            return self._columns[column]
+        return None
+
+    def tag_list(self, delimiter=':'):
+        entries = []
+        for column, individual in self._columns.iteritems():
+            first_token = individual.name.split()[0]
+            entry = '{0}{1}{2}'.format(column, delimiter, first_token)
+            entries.append(entry)
+        return entries
+
+    def to_string(self, delimiter=':', separator=' ', replace_names_with=None):
+        entries = []
+        for column, individual in self._columns.items():
+            value = individual.name
+            if replace_names_with is not None:
+                value = replace_names_with
+            entry = '{0}{1}{2}'.format(column, delimiter, value)
+            entries.append(entry)
+        return separator.join(entries)
+
+    def __str__(self):
+        return self.to_string()
+
+    def from_population_file(self, filename):
+        with open(filename) as fh:
+            for line in fh:
+                line = line.rstrip('\r\n')
+                column, name, alias = line.split('\t')
+                alias = alias.strip()
+                individual = Individual(column, name)
+                if alias:
+                    individual.alias = alias
+                self.add_individual(individual)
+
+    def from_tag_list(self, tag_list):
+        for tag in tag_list:
+            column, name = tag.split(':')
+            individual = Individual(column, name)
+            self.add_individual(individual)
+
+    def from_wrapped_dict(self, wrapped_dict):
+        unwraped_dict = self.unwrap_dict(wrapped_dict)
+        for name, column in unwraped_dict.iteritems():
+            individual = Individual(column, name)
+            self.add_individual(individual)
+
+    def unwrap_dict(self, wrapped_dict):
+        decoded_value = self.decode_value(wrapped_dict)
+        decompressed_value = self.decompress_value(decoded_value)
+        def _decode_list(data):
+            rv = []
+            for item in data:
+                if isinstance(item, unicode):
+                    item = item.encode('utf-8')
+                elif isinstance(item, list):
+                    item = _decode_list(item)
+                elif isinstance(item, dict):
+                    item = _decode_dict(item)
+                rv.append(item)
+            return rv
+        def _decode_dict(data):
+            rv = {}
+            for key, value in data.iteritems():
+                if isinstance(key, unicode):
+                    key = key.encode('utf-8')
+                if isinstance(value, unicode):
+                    value = value.encode('utf-8')
+                elif isinstance(value, list):
+                    value = _decode_list(value)
+                elif isinstance(value, dict):
+                    value = _decode_dict(value)
+                rv[key] = value
+            return rv
+        unwrapped_dict = json.loads(decompressed_value, object_hook=_decode_dict)
+        return unwrapped_dict
+
+    def decode_value(self, value):
+        try:
+            return base64.b64decode(value)
+        except TypeError, message:
+            print >> sys.stderr, 'base64.b64decode: {0}: {1}'.format(message, value)
+            sys.exit(1)
+
+    def decompress_value(self, value):
+        try:
+            return zlib.decompress(value)
+        except zlib.error, message:
+            print >> sys.stderr, 'zlib.decompress: {0}'.format(message)
+            sys.exit(1)
+
+    def individual_names(self):
+        for column, individual in self._columns.items():
+            first_token = individual.name.split()[0]
+            yield first_token
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/README	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,3 @@
+The Genome Diversity tools require the following software:
+    ADMIXTURE  (we used version 1.22)  http://www.genetics.ucla.edu/software/admixture/
+    KING       (we used version 1.5)   http://people.virginia.edu/~wc9c/KING/
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/add_fst_column.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,56 @@
+#!/usr/bin/env python
+
+import gd_util
+import sys
+from Population import Population
+
+################################################################################
+
+if len(sys.argv) != 13:
+    gd_util.die('Usage')
+
+input, p1_input, p2_input, input_type, data_source, min_reads, min_qual, retain, discard_fixed, biased, output, ind_arg = sys.argv[1:]
+
+p_total = Population()
+p_total.from_wrapped_dict(ind_arg)
+
+p1 = Population()
+p1.from_population_file(p1_input)
+if not p_total.is_superset(p1):
+    gd_util.die('There is an individual in population 1 that is not in the SNP table')
+
+p2 = Population()
+p2.from_population_file(p2_input)
+if not p_total.is_superset(p2):
+    gd_util.die('There is an individual in population 2 that is not in the SNP table')
+
+################################################################################
+
+prog = 'Fst_column'
+
+args = [ prog ]
+args.append(input)
+args.append(data_source)
+args.append(min_reads)
+args.append(min_qual)
+args.append(retain)
+args.append(discard_fixed)
+args.append(biased)
+
+columns = p1.column_list()
+for column in columns:
+    if input_type == 'gd_genotype':
+        column = int(column) - 2
+    args.append('{0}:1'.format(column))
+
+columns = p2.column_list()
+for column in columns:
+    if input_type == 'gd_genotype':
+        column = int(column) - 2
+    args.append('{0}:2'.format(column))
+
+with open(output, 'w') as fh:
+    gd_util.run_program(prog, args, stdout=fh)
+
+sys.exit(0)
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/add_fst_column.xml	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,175 @@
+<tool id="gd_add_fst_column" name="Per-SNP FSTs" version="1.2.0">
+  <description>: Compute a fixation index score for each SNP</description>
+
+  <command interpreter="python">
+    #import json
+    #import base64
+    #import zlib
+    #set $ind_names = $input.dataset.metadata.individual_names
+    #set $ind_colms = $input.dataset.metadata.individual_columns
+    #set $ind_dict = dict(zip($ind_names, $ind_colms))
+    #set $ind_json = json.dumps($ind_dict, separators=(',',':'))
+    #set $ind_comp = zlib.compress($ind_json, 9)
+    #set $ind_arg = base64.b64encode($ind_comp)
+    add_fst_column.py '$input' '$p1_input' '$p2_input'
+    #if $input_type.choice == '0'
+      'gd_snp' '$input_type.data_source.choice'
+      #if $input_type.data_source.choice == '0'
+        '$input_type.data_source.min_reads' '$input_type.data_source.min_qual'
+      #else if $input_type.data_source.choice == '1'
+        '0' '0'
+      #end if
+    #else if $input_type.choice == '1'
+      'gd_genotype' '1' '0' '0'
+    #end if
+    '$retain' '$discard_fixed' '$biased' '$output' '$ind_arg'
+  </command>
+
+  <inputs>
+    <conditional name="input_type">
+      <param name="choice" type="select" format="integer" label="Input format">
+        <option value="0" selected="true">gd_snp</option>
+        <option value="1">gd_genotype</option>
+      </param>
+
+      <when value="0">
+        <param name="input" type="data" format="gd_snp" label="SNP dataset" />
+
+        <conditional name="data_source">
+          <param name="choice" type="select" format="integer" label="Frequency metric">
+            <option value="0">sequence coverage</option>
+            <option value="1" selected="true">estimated genotype</option>
+          </param>
+          <when value="0">
+            <param name="min_reads" type="integer" min="0" value="0" label="Minimum total read count for a population" />
+            <param name="min_qual" type="integer" min="0" value="0" label="Minimum individual genotype quality" />
+          </when>
+          <when value="1"/>
+        </conditional>
+      </when>
+      <when value="1">
+        <param name="input" type="data" format="gd_genotype" label="Genotype dataset" />
+      </when>
+    </conditional>
+
+    <param name="p1_input" type="data" format="gd_indivs" label="Population 1 individuals" />
+    <param name="p2_input" type="data" format="gd_indivs" label="Population 2 individuals" />
+
+    <param name="retain" type="select" label="If a SNP is below minimum">
+      <option value="0" selected="true">skip SNP</option>
+      <option value="1">set FST = -1</option>
+    </param>
+
+    <param name="discard_fixed" type="select" label="For SNPs that appear to be fixed across both populations">
+      <option value="0">retain</option>
+      <option value="1" selected="true">delete</option>
+    </param>
+
+    <param name="biased" type="select" label="FST estimator">
+      <option value="0">Wright's original definition</option>
+      <option value="1">the Weir-Cockerham estimator</option>
+      <option value="2" selected="true">the Reich-Patterson estimator</option>
+    </param>
+
+  </inputs>
+
+  <outputs>
+    <data name="output" format="input" format_source="input" metadata_source="input" />
+  </outputs>
+
+  <requirements>
+    <requirement type="package" version="0.1">gd_c_tools</requirement>
+  </requirements>
+
+  <tests>
+    <test>
+      <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" />
+      <param name="p1_input" value="test_in/a.gd_indivs" ftype="gd_indivs" />
+      <param name="p2_input" value="test_in/b.gd_indivs" ftype="gd_indivs" />
+      <param name="data_source" value="0" />
+      <param name="min_reads" value="3" />
+      <param name="min_qual" value="0" />
+      <param name="retain" value="0" />
+      <param name="discard_fixed" value="1" />
+      <param name="biased" value="0" />
+      <output name="output" file="test_out/add_fst_column/add_fst_column.gd_snp" />
+    </test>
+  </tests>
+
+  <help>
+
+**Dataset formats**
+
+The input datasets are in gd_snp_, gd_genotype_, and gd_indivs_ formats.
+The output dataset is in gd_snp_ or gd_genotype_ format.  (`Dataset missing?`_)
+
+.. _gd_snp: ./static/formatHelp.html#gd_snp
+.. _gd_genotype: ./static/formatHelp.html#gd_genotype
+.. _gd_indivs: ./static/formatHelp.html#gd_indivs
+.. _Dataset missing?: ./static/formatHelp.html
+
+-----
+
+**What it does**
+
+The user specifies a SNP table and two "populations" of individuals, both previously defined using the Galaxy tool to specify individuals from a SNP table. No individual can be in both populations. Other choices are as follows.
+
+Frequency metric. The allele frequencies of a SNP in the two populations can be estimated either by the total number of reads of each allele (if the table is in gd_snp format, but not with gd_genotype), or by adding the frequencies inferred from genotypes of individuals in the populations.
+
+After specifying the frequency metric, the user sets lower bounds on amount of data required at a SNP. For estimating the Fst using read counts, the bound is the minimum count of reads of the two alleles in a population. For estimations based on genotype, the bound is the minimum reported genotype quality per individual.
+
+The user specifies whether the SNPs that violate the lower bound should be ignored or the Fst set to -1.
+
+The user specifies whether SNPs where both populations appear to be fixed for the same allele should be retained or discarded.
+
+Finally, the user chooses which definition of Fst to use: Wright's original definition, the Weir-Cockerham unbiased estimator, or the Reich-Patterson estimator.
+
+A column is appended to the SNP table giving the Fst for each retained SNP.
+
+References:
+
+Sewall Wright (1951) The genetical structure of populations. Ann Eugen 15:323-354.
+
+Weir, B.S. and Cockerham, C. Clark (1984) Estimating F-statistics for the analysis of population structure. Evolution 38:1358-1370.
+
+Weir, B.S. 1996. Population substructure. Genetic data analysis II, pp. 161-173. Sinauer Associates, Sundand, MA.
+
+David Reich, Kumarasamy Thangaraj, Nick Patterson, Alkes L. Price, and Lalji Singh (2009) Reconstructing Indian population history. Nature 461:489-494, especially Supplement 2.
+
+Their effectiveness for computing FSTs when there are many SNPs but few individuals is discussed in the following paper.
+
+Eva-Maria Willing, Christine Dreyer, Cock van Oosterhout (2012) Estimates of genetic differentiation measured by FST do not necessarily require large sample sizes when using many SNP markers. PLoS One 7:e42649.
+
+-----
+
+**Example**
+
+- input, SNP table::
+
+   #{"column_names":["scaf","pos","A","B","qual","ref","rpos","rnuc","1A","1B","1G","1Q","2A","2B","2G","2Q","3A","3B","3G","3Q","4A","4B","4G","4Q",
+   #"5A","5B","5G","5Q","6A","6B","6G","6Q","pair","dist","prim","rflp"],"dbkey":"canFam2",
+   #"individuals":[["PB1",9],["PB2",13],["PB3",17],["PB4",21],["PB6",25],["PB8",29]],
+   #"pos":2,"rPos":7,"ref":6,"scaffold":1,"species":"bear"}
+   Contig161_chr1_4641264_4641879    115  C  T  73.5  chr1  4641382  C  6  0  2  45  8  0  2  51  15  0  2  72  5  0  2  42  6  0  2  45  10  0  2  57  Y  54   0.323  0
+   Contig113_chr5_11052263_11052603  28   C  T  38.2  chr5  11052280 C  1  2  1  12  3  2  1  10  5   0  2  42  2  1  2  13  3  0  2  36  8   0  2  51  Y  161  +99.   0
+   Contig215_chr5_70946445_70947428  363  T  G  28.2  chr5  70946809 C  4  0  2  39  0  5  0  12  9   0  2  54  6  0  2  45  3  3  2  1   9   0  2  54  N  43   0.153  0
+   etc.
+
+- input, Population 1 individuals::
+
+   9       PB1
+   13      PB2
+
+- input, Population 2 individuals::
+
+   17      PB3
+   21      PB4
+
+- output (minimum read count of 3, discard fixed)::
+
+   Contig113_chr5_11052263_11052603  28   C  T  38.2  chr5  11052280  C  1  2  1  12  3  2  1  10  5  0  2  42  2  1  2  13  3  0  2  36  8  0  2  51  Y  161  +99.   0  0.1636
+   Contig215_chr5_70946445_70947428  363  T  G  28.2  chr5  70946809  C  4  0  2  39  0  5  0  12  9  0  2  54  6  0  2  45  3  3  2  1   9  0  2  54  N  43   0.153  0  0.3846
+   etc.
+
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/aggregate_gd_indivs.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,48 @@
+#!/usr/bin/env python
+
+import gd_util
+import sys
+from Population import Population
+
+################################################################################
+
+if len(sys.argv) != 6:
+    gd_util.dir('Usage')
+
+input, p1_input, output, input_type, ind_arg  = sys.argv[1:]
+
+p_total = Population()
+p_total.from_wrapped_dict(ind_arg)
+
+p1 = Population()
+p1.from_population_file(p1_input)
+
+if not p_total.is_superset(p1):
+    gd_util.die('There is an individual in the population that is not in the SNP table')
+
+################################################################################
+
+prog = 'aggregate'
+
+args = [ prog ]
+args.append(input)
+
+if input_type == 'gd_snp':
+    args.append(1)
+elif input_type == 'gd_genotype':
+    args.append(0)
+else:
+    die('unknown input type: {0}'.format(input_type))
+
+columns = p1.column_list()
+
+for column in sorted(columns):
+    if input_type == 'gd_genotype':
+        column = str(int(column) - 2)
+    args.append(column)
+
+with open(output, 'w') as fh:
+    gd_util.run_program(prog, args, stdout=fh)
+
+sys.exit(0)
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/aggregate_gd_indivs.xml	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,113 @@
+<tool id="gd_sum_gd_snp" name="Aggregate Individuals" version="1.1.0">
+  <description>: Append summary columns for a population</description>
+
+  <command interpreter="python">
+    #import json
+    #import base64
+    #import zlib
+    #set $ind_names = $input.dataset.metadata.individual_names
+    #set $ind_colms = $input.dataset.metadata.individual_columns
+    #set $ind_dict = dict(zip($ind_names, $ind_colms))
+    #set $ind_json = json.dumps($ind_dict, separators=(',',':'))
+    #set $ind_comp = zlib.compress($ind_json, 9)
+    #set $ind_arg = base64.b64encode($ind_comp)
+    aggregate_gd_indivs.py '$input' '$p1_input' '$output'
+    #if $input_type.choice == '0'
+      'gd_snp'
+    #else if $input_type.choice == '1'
+      'gd_genotype'
+    #end if
+    '$ind_arg'
+  </command>
+
+  <inputs>
+
+  <conditional name="input_type">
+    <param name="choice" type="select" format="integer" label="Input format">
+      <option value="0" selected="true">gd_snp</option>
+      <option value="1">gd_genotype</option>
+    </param>
+
+    <when value="0">
+      <param name="input" type="data" format="gd_snp" label="SNP dataset" />
+    </when>
+    <when value="1">
+      <param name="input" type="data" format="gd_genotype" label="Genotype dataset" />
+    </when>
+  </conditional>
+
+    <param name="p1_input" type="data" format="gd_indivs" label="Population individuals" />
+  </inputs>
+
+  <outputs>
+    <data name="output" format="input" format_source="input" metadata_source="input" />
+  </outputs>
+
+  <requirements>
+    <requirement type="package" version="0.1">gd_c_tools</requirement>
+  </requirements>
+
+  <tests>
+    <test>
+      <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" />
+      <param name="p1_input" value="test_in/a.gd_indivs" ftype="gd_indivs" />
+      <output name="output" file="test_out/modify_snp_table/modify.gd_snp" />
+    </test>
+  </tests>
+
+  <help>
+
+**Dataset formats**
+
+The input datasets are in gd_snp_, gd_genotype_, and gd_indivs_ formats.
+The output dataset is in gd_snp_ or gd_genotype_ format.  (`Dataset missing?`_)
+
+.. _gd_snp: ./static/formatHelp.html#gd_snp
+.. _gd_genotype: ./static/formatHelp.html#gd_genotype
+.. _gd_indivs: ./static/formatHelp.html#gd_indivs
+.. _Dataset missing?: ./static/formatHelp.html
+
+-----
+
+**What it does**
+
+The user specifies that some of the individuals in a gd_snp or gd_genotype
+dataset form a "population", by supplying a list that has been previously
+created using the Specify Individuals tool.  The program appends a new
+"entity" (set of four columns for a gd_snp table, or one column for a
+gd_genotype table), analogous to the column(s) for an individual but
+containing summary data for the population as a group.  For a gd_snp
+table, these four columns give the total counts for the two alleles,
+the "genotype" for the population, and the maximum quality value, taken
+over all individuals in the population.  If all defined genotypes in
+the population are 2 (agree with the reference), then the population's
+genotype is 2, and similarly for 0; otherwise the genotype is 1 (unless
+all individuals have undefined genotype, in which case it is -1).
+For a gd_genotype file, only the aggregate genotype is appended.
+
+-----
+
+**Example**
+
+- input gd_snp::
+
+    Contig161_chr1_4641264_4641879   115  C  T  73.5   chr1   4641382  C   6  0  2  45   8  0  2  51   15  0  2  72   5  0  2  42   6  0  2  45   10  0  2  57   Y  54  0.323  0
+    Contig48_chr1_10150253_10151311   11  A  G  94.3   chr1  10150264  A   1  0  2  30   1  0  2  30    1  0  2  30   3  0  2  36   1  0  2  30    1  0  2  30   Y  22  +99.   0
+    Contig20_chr1_21313469_21313570   66  C  T  54.0   chr1  21313534  C   4  0  2  39   4  0  2  39    5  0  2  42   4  0  2  39   4  0  2  39    5  0  2  42   N   1  +99.   0
+    etc.
+
+- input individuals::
+
+    9   PB1
+    13  PB2
+    17  PB3
+
+- output::
+
+    Contig161_chr1_4641264_4641879   115  C  T  73.5   chr1   4641382  C   6  0  2  45   8  0  2  51   15  0  2  72   5  0  2  42   6  0  2  45   10  0  2  57   Y  54  0.323  0   29  0  2  72
+    Contig48_chr1_10150253_10151311   11  A  G  94.3   chr1  10150264  A   1  0  2  30   1  0  2  30    1  0  2  30   3  0  2  36   1  0  2  30    1  0  2  30   Y  22  +99.   0    3  0  2  30
+    Contig20_chr1_21313469_21313570   66  C  T  54.0   chr1  21313534  C   4  0  2  39   4  0  2  39    5  0  2  42   4  0  2  39   4  0  2  39    5  0  2  42   N   1  +99.   0   13  0  2  42
+    etc.
+
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/assignment_of_optimal_breeding_pairs.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,185 @@
+#!/usr/bin/env python2.6
+
+import sys
+import munkres
+import random
+
+class Vertex(object):
+    def __init__(self, name):
+        self.name = name
+        self.neighbors = {}
+        self.color = 0
+        self.explored = False
+
+    def add_neighbor(self, neighbor, weight=0.0):
+        if neighbor in self.neighbors:
+            if self.neighbors[neighbor] != weight:
+                die('multiple edges not supported')
+        else:
+            self.neighbors[neighbor] = weight
+
+class Graph(object):
+    def __init__(self):
+        self.vertex_list = {}
+        self.vertices = 0
+        self.max_weight = 0.0
+
+    def add_vertex(self, name):
+        if name not in self.vertex_list:
+            self.vertex_list[name] = Vertex(name)
+            self.vertices += 1
+        return self.vertex_list[name]
+
+    def add_edge(self, name1, name2, weight):
+        vertex1 = self.add_vertex(name1)
+        vertex2 = self.add_vertex(name2)
+        vertex1.add_neighbor(vertex2, weight)
+        vertex2.add_neighbor(vertex1, weight)
+        self.max_weight = max(self.max_weight, weight)
+
+    def from_edge_file(self, filename):
+        fh = try_open(filename)
+        line_number = 0
+        for line in fh:
+            line_number += 1
+            line = line.rstrip('\r\n')
+            elems = line.split()
+            if len(elems) < 3:
+                die('too few columns on line {0} of {1}:\n{2}'.format(line_number, filename, line))
+            name1 = elems[0]
+            name2 = elems[1]
+            weight = float_value(elems[2])
+            if weight is None:
+                die('invalid weight on line {0} of {1}:\n{2}'.format(line_number, filename, line))
+            self.add_edge(name1, name2, weight)
+        fh.close()
+
+    def bipartite_partition(self):
+        vertices_left = self.vertex_list.values()
+
+        while vertices_left:
+            fifo = [vertices_left[0]]
+            while fifo:
+                vertex = fifo.pop(0)
+                if not vertex.explored:
+                    vertex.explored = True
+                    vertices_left.remove(vertex)
+
+                    if vertex.color == 0:
+                        vertex.color = 1
+                        neighbor_color = 2
+                    elif vertex.color == 1:
+                        neighbor_color = 2
+                    elif vertex.color == 2:
+                        neighbor_color = 1
+
+                    for neighbor in vertex.neighbors:
+                        if neighbor.color == 0:
+                            neighbor.color = neighbor_color
+                        elif neighbor.color != neighbor_color:
+                            return None, None
+                        fifo.append(neighbor)
+
+        c1 = []
+        c2 = []
+
+        for vertex in self.vertex_list.values():
+            if vertex.color == 1:
+                c1.append(vertex)
+            elif vertex.color == 2:
+                c2.append(vertex)
+
+        return c1, c2
+
+def try_open(*args):
+    try:
+        return open(*args)
+    except IOError:
+        die('Failed opening file: {0}'.format(args[0]))
+
+def float_value(token):
+    try:
+        return float(token)
+    except ValueError:
+        return None
+
+def die(message):
+    print >> sys.stderr, message
+    sys.exit(1)
+
+def main(input, randomizations, output):
+    graph = Graph()
+    graph.from_edge_file(input)
+    c1, c2 = graph.bipartite_partition()
+
+    if c1 is None:
+        die('Graph is not bipartite')
+
+    if len(c1) + len(c2) != graph.vertices:
+        die('Bipartite partition failed: {0} + {1} != {2}'.format(len(c1), len(c2), graph.vertices))
+
+    with open(output, 'w') as ofh:
+        a1 = optimal_assignment(c1, c2, graph.max_weight)
+        optimal_total_weight = 0.0
+        for a in a1:
+            optimal_total_weight += a[0].neighbors[a[1]]
+
+        print >> ofh, 'optimal average {0:.3f}'.format(optimal_total_weight / len(a1))
+
+        if randomizations > 0:
+            random_total_count = 0
+            random_total_weight = 0.0
+            for i in range(randomizations):
+                a2 = random_assignment(c1, c2)
+                random_total_count += len(a2)
+                for a in a2:
+                    random_total_weight += a[0].neighbors[a[1]]
+            print >> ofh, 'random average {0:.3f}'.format(random_total_weight / random_total_count)
+
+
+        for a in a1:
+            print >> ofh, '\t'.join([a[0].name, a[1].name])
+
+def optimal_assignment(c1, c2, max_weight):
+    matrix = []
+    assignment = []
+
+    for v1 in c1:
+        row = []
+        for v2 in c2:
+            row.append(max_weight + 1.0 - v1.neighbors[v2])
+        matrix.append(row)
+
+    m = munkres.Munkres()
+    indexes = m.compute(matrix)
+    for row, column in indexes:
+        assignment.append([c1[row], c2[column]])
+
+    return assignment
+
+def random_assignment(c1, c2):
+    assignment = []
+
+    ## note, this assumes that graph is complete bipartite
+    ## this needs to be fixed
+    c1_len = len(c1)
+    c2_len = len(c2)
+    idx_list = list(range(max(c1_len, c2_len)))
+    random.shuffle(idx_list)
+
+    if c1_len <= c2_len:
+        for i, v1 in enumerate(c1):
+            assignment.append([v1, c2[idx_list[i]]])
+    else:
+        for i, v1 in enumerate(c2):
+            assignment.append([v1, c1[idx_list[i]]])
+
+    return assignment
+
+################################################################################
+
+if len(sys.argv) != 4:
+    die('Usage')
+
+input, randomizations, output = sys.argv[1:]
+main(input, int(randomizations), output)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/assignment_of_optimal_breeding_pairs.xml	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,54 @@
+<tool id="gd_assignment_of_optimal_breeding_pairs" name="Matings" version="1.0.0">
+  <description>: Assignment of optimal breeding pairs</description>
+
+  <command interpreter="python">
+    assignment_of_optimal_breeding_pairs.py '$input' '$randomizations' '$output'
+  </command>
+
+  <inputs>
+    <param name="input" type="data" format="txt" label="Pairs dataset" />
+    <param name="randomizations" type="integer" min="0" value="0" label="Randomizations" />
+  </inputs>
+
+  <outputs>
+    <data name="output" format="txt" />
+  </outputs>
+
+  <requirements>
+    <requirement type="package" version="1.0.5.4">munkres</requirement>
+  </requirements>
+
+  <!--
+  <tests>
+  </tests>
+  -->
+
+  <help>
+
+**Dataset formats**
+
+The input and output datasets are in text_ format.
+
+.. _text: ./static/formatHelp.html#text
+
+The pairs dataset consists of lines of the form::
+
+   name1  name2  prob
+
+as generated by either of the &quot;Offspring estimated heterozygosity&quot; tools.
+
+-----
+
+**What it does**
+
+The user supplies the offspring estimated heterozygosity for every
+potential breeding pair, i.e., the expected fraction of autosomal SNPs
+for which an offspring is heterozygous.  The tool assigns breeding
+pairs to maximize the average estimated heterozygosity of the offspring.
+Optionally, the user can specify a number of random assigned pairings,
+for which the program reports the average estimated heterozygosity
+of the offspring; this gives a comparison of the optimal and average
+heterozygosity resulting from an assignment of breeding pairs.
+
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/average_fst.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,73 @@
+#!/usr/bin/env python
+
+import gd_util
+import sys
+from Population import Population
+
+################################################################################
+
+if len(sys.argv) != 12:
+    gd_util.die('Usage')
+
+input, p1_input, p2_input, input_type, data_source, min_total_count, discard_fixed, output, shuffles, p0_input, ind_arg = sys.argv[1:]
+
+try:
+    shuffle_count = int(shuffles)
+except:
+    shuffle_count = 0
+
+p_total = Population()
+p_total.from_wrapped_dict(ind_arg)
+
+p1 = Population()
+p1.from_population_file(p1_input)
+if not p_total.is_superset(p1):
+    gd_util.die('There is an individual in population 1 that is not in the SNP table')
+
+p2 = Population()
+p2.from_population_file(p2_input)
+if not p_total.is_superset(p2):
+    gd_util.die('There is an individual in population 2 that is not in the SNP table')
+
+p0 = None
+if shuffle_count > 0:
+    p0 = Population()
+    p0.from_population_file(p0_input)
+    if not p_total.is_superset(p0):
+        gd_util.die('There is an individual in population 0 that is not in the SNP table')
+
+################################################################################
+
+prog = 'Fst_ave'
+
+args = [ prog ]
+args.append(input)
+args.append(data_source)
+args.append(min_total_count)
+args.append(discard_fixed)
+args.append(shuffles)
+
+columns = p1.column_list()
+for column in columns:
+    if input_type == 'gd_genotype':
+        column = int(column) - 2
+    args.append('{0}:1'.format(column))
+
+columns = p2.column_list()
+for column in columns:
+    if input_type == 'gd_genotype':
+        column = int(column) - 2
+    args.append('{0}:2'.format(column))
+
+if p0 is not None:
+    columns = p0.column_list()
+    for column in columns:
+        if input_type == 'gd_genotype':
+            column = int(column) - 2
+        args.append('{0}:0'.format(column))
+
+with open(output, 'w') as fh:
+    gd_util.run_program(prog, args, stdout=fh)
+
+sys.exit(0)
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/average_fst.xml	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,168 @@
+<tool id="gd_average_fst" name="Overall FST" version="1.3.0">
+  <description>: Estimate the relative fixation index between two populations</description>
+
+  <command interpreter="python">
+    #import json
+    #import base64
+    #import zlib
+    #set $ind_names = $input.dataset.metadata.individual_names
+    #set $ind_colms = $input.dataset.metadata.individual_columns
+    #set $ind_dict = dict(zip($ind_names, $ind_colms))
+    #set $ind_json = json.dumps($ind_dict, separators=(',',':'))
+    #set $ind_comp = zlib.compress($ind_json, 9)
+    #set $ind_arg = base64.b64encode($ind_comp)
+    average_fst.py '$input' '$p1_input' '$p2_input'
+    #if $input_type.choice == '0'
+      'gd_snp' '$input_type.data_source.choice'
+      #if $input_type.data_source.choice == '0'
+        '$input_type.data_source.min_value'
+      #else if $input_type.data_source.choice == '1'
+        '1'
+      #end if
+    #else if $input_type.choice == '1'
+      'gd_genotype' '1' '1'
+    #end if
+    '$discard_fixed' '$output'
+    #if $use_randomization.choice == '0'
+      '0' '/dev/null'
+    #else if $use_randomization.choice == '1'
+      '$use_randomization.shuffles' '$use_randomization.p0_input'
+    #end if
+    '$ind_arg'
+  </command>
+
+  <inputs>
+    <conditional name="input_type">
+      <param name="choice" type="select" format="integer" label="Input format">
+        <option value="0" selected="true">gd_snp</option>
+        <option value="1">gd_genotype</option>
+      </param>
+
+      <when value="0">
+        <param name="input" type="data" format="gd_snp" label="SNP dataset" />
+
+        <conditional name="data_source">
+          <param name="choice" type="select" format="integer" label="Frequency metric">
+            <option value="0">sequence coverage</option>
+            <option value="1" selected="true">estimated genotype</option>
+          </param>
+
+          <when value="0">
+            <param name="min_value" type="integer" min="1" value="1" label="Minimum total read count for a population" />
+          </when>
+
+          <when value="1"/>
+        </conditional>
+      </when>
+
+      <when value="1">
+        <param name="input" type="data" format="gd_genotype" label="Genotype dataset" />
+      </when>
+    </conditional>
+
+    <param name="p1_input" type="data" format="gd_indivs" label="Population 1 individuals" />
+    <param name="p2_input" type="data" format="gd_indivs" label="Population 2 individuals" />
+
+    <param name="discard_fixed" type="select" label="For SNPs that appear to be fixed across both populations">
+      <option value="0">retain</option>
+      <option value="1" selected="true">delete</option>
+    </param>
+
+    <conditional name="use_randomization">
+      <param name="choice" type="select" format="integer" label="Use randomization">
+        <option value="0" selected="true">no</option>
+        <option value="1">yes</option>
+      </param>
+      <when value="0" />
+      <when value="1">
+        <param name="shuffles" type="integer" min="0" value="0" label="Shuffles" />
+        <param name="p0_input" type="data" format="gd_indivs" label="Individuals for randomization" />
+      </when>
+    </conditional>
+  </inputs>
+
+  <outputs>
+    <data name="output" format="txt" />
+  </outputs>
+
+  <requirements>
+    <requirement type="package" version="0.1">gd_c_tools</requirement>
+  </requirements>
+
+  <tests>
+    <test>
+      <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" />
+      <param name="p1_input" value="test_in/a.gd_indivs" ftype="gd_indivs" />
+      <param name="p2_input" value="test_in/b.gd_indivs" ftype="gd_indivs" />
+      <param name="ds_choice" value="0" />
+      <param name="min_value" value="3" />
+      <param name="discard_fixed" value="1" />
+      <param name="choice" value="0" />
+      <output name="output" file="test_out/average_fst/average_fst.txt" />
+    </test>
+  </tests>
+
+  <help>
+
+**Dataset formats**
+
+The input datasets are in gd_snp_, gd_genotype_, and gd_indivs_ formats.
+The output dataset is in text_ format.  (`Dataset missing?`_)
+
+.. _gd_snp: ./static/formatHelp.html#gd_snp
+.. _gd_genotype: ./static/formatHelp.html#gd_genotype
+.. _gd_indivs: ./static/formatHelp.html#gd_indivs
+.. _text: ./static/formatHelp.html#text
+.. _Dataset missing?: ./static/formatHelp.html
+
+-----
+
+**What it does**
+
+The user specifies a SNP table and two "populations" of individuals, both previously defined using the Galaxy tool to specify individuals from a SNP table. No individual can be in both populations. Other choices are as follows.
+
+Frequency metric. The allele frequencies of a SNP in the two populations can be estimated either by the total number of reads of each allele (if the table is in gd_snp format, but not with gd_genotype), or by adding the frequencies inferred from genotypes of individuals in the populations.
+
+After specifying the frequency metric, the user sets lower bounds on amount of data required at a SNP. For estimating the FST using read counts, the bound is the minimum count of reads of the two alleles in a population. For estimations based on genotype, the bound is the minimum reported genotype quality per individual. SNPs not meeting these lower bounds are ignored.
+
+The user specifies whether SNPs where both populations appear to be fixed for the same allele should be retained or discarded.
+
+Finally, the user decides whether to use randomizations. If so, then the user specifies how many randomly generated population pairs (retaining the numbers of individuals of the originals) to generate, as well as the "population" of additional individuals (not in the first two populations) that can be used in the randomization process.
+
+The program prints the following measures of FST for the two populations.
+
+1. The Reich-Patterson estimator (average over FSTs for all SNPs).
+2. The population-based Reich-Patterson estimator.
+3. The formulation by Sewall Wright (average over FSTs for all SNPs).
+4. The Weir-Cockerham estimator (average over FSTs for all SNPs).
+
+If randomizations were requested, it prints a summary for each of the four definitions of FST that includes the maximum and average value, and the highest-scoring population pair (if any scored higher than the two user-specified populations).
+
+References:
+
+Sewall Wright (1951) The genetical structure of populations. Ann Eugen 15:323-354.
+
+Weir, B.S. and Cockerham, C. Clark (1984) Estimating F-statistics for the analysis of population structure. Evolution 38:1358-1370.
+
+Weir, B.S. 1996. Population substructure. Genetic data analysis II, pp. 161-173. Sinauer Associates, Sundand, MA.
+
+David Reich, Kumarasamy Thangaraj, Nick Patterson, Alkes L. Price, and Lalji Singh (2009) Reconstructing Indian population history. Nature 461:489-494, especially Supplement 2.
+
+Their effectiveness for computing FSTs when there are many SNPs but few individuals is discussed in the following paper.
+
+Eva-Maria Willing, Christine Dreyer, Cock van Oosterhout (2012) Estimates of genetic differentiation measured by FST do not necessarily require large sample sizes when using many SNP markers. PLoS One 7:e42649.
+
+-----
+
+**Example**
+
+- output::
+
+   Using 37847 SNPs, we compute:
+   Average Reich-Patterson FST is 0.31012.
+   The population-based Reich-Patterson Fst is 0.33625.
+   Average Wright FST is 0.22810.
+   Average Weir-Cockerham FST is 0.30813.
+
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/calclenchange.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,280 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+#       calclenchange.py
+#
+#       Copyright 2011 Oscar Bedoya-Reina <oscar@niska.bx.psu.edu>
+#
+#       This program is free software; you can redistribute it and/or modify
+#       it under the terms of the GNU General Public License as published by
+#       the Free Software Foundation; either version 2 of the License, or
+#       (at your option) any later version.
+#
+#       This program is distributed in the hope that it will be useful,
+#       but WITHOUT ANY WARRANTY; without even the implied warranty of
+#       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#       GNU General Public License for more details.
+#
+#       You should have received a copy of the GNU General Public License
+#       along with this program; if not, write to the Free Software
+#       Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+#       MA 02110-1301, USA.
+
+import argparse,mechanize,os,sys
+from decimal import Decimal,getcontext
+from xml.etree.ElementTree import ElementTree,tostring
+import networkx as nx
+from copy import copy
+
+#method to rank the the pthways by mut. freq.
+def rankdN(ltfreqs):
+	ordvals=sorted(ltfreqs)#sort and reverse freqs.
+	#~
+	outrnk=[]
+	tmpChng0,tmpOri,tmpMut,tmpPthw=ordvals.pop()#the highest possible value
+	if tmpOri=='C':
+		if tmpMut!='C':
+			tmpChng0='C-%s'%tmpMut
+		else:
+			tmpChng0=Decimal('0')
+	crank=1
+	outrnk.append([str(tmpChng0),str(tmpOri),str(tmpMut),str(crank),tmpPthw])
+	totalnvals=len(ordvals)
+	cnt=0
+	while totalnvals>cnt:
+		cnt+=1
+		tmpChng,tmpOri,tmpMut,tmpPthw=ordvals.pop()
+		if tmpOri=='C':
+			if tmpMut!='C':
+				tmpChng='C-%s'%tmpMut
+			else:
+				tmpChng=Decimal('0')
+		if tmpChng!=tmpChng0:
+			crank=len(outrnk)+1
+			tmpChng0=tmpChng
+		outrnk.append([str(tmpChng),str(tmpOri),str(tmpMut),str(crank),tmpPthw])
+	return outrnk
+
+#method to rank the the pthways by mut. freq.
+def rankdAvr(ltfreqs):
+	ordvals=sorted(ltfreqs)#sort and reverse freqs.
+	#~
+	outrnk={}
+	tmpChng0,tmpOri,tmpMut,tmpPthw=ordvals.pop()#the highest possible value
+	if tmpOri=='I':
+		if tmpMut!='I':
+			tmpChng0='I-%s'%tmpMut
+		else:
+			tmpChng0=Decimal('0')
+	crank=1
+	outrnk[tmpPthw]='\t'.join([str(tmpChng0),str(tmpOri),str(tmpMut),str(crank)])
+	totalnvals=len(ordvals)
+	cnt=0
+	while totalnvals>cnt:
+		cnt+=1
+		tmpChng,tmpOri,tmpMut,tmpPthw=ordvals.pop()
+		if tmpOri=='I':
+			if tmpMut!='I':
+				tmpChng='I-%s'%tmpMut
+			else:
+				tmpChng=Decimal('0')
+		if tmpChng!=tmpChng0:
+			crank=len(outrnk)+1
+			tmpChng0=tmpChng
+		outrnk[tmpPthw]='\t'.join([str(tmpChng),str(tmpOri),str(tmpMut),str(crank)])
+	return outrnk
+
+#this method takes as input a list of pairs of edges(beginNod,endNod) and returns a list of nodes with indegree 0 and outdegree 0
+def returnstartanendnodes(edges):
+	listID0st=set()#starts
+	listOD0en=set()#end
+	for beginNod,endNod in edges:# O(n)
+		listID0st.add(beginNod)
+		listOD0en.add(endNod)
+	startNdsID0=listID0st.difference(listOD0en)
+	endNdsOD0=listOD0en.difference(listID0st)
+	return startNdsID0,endNdsOD0
+
+#~ Method to return nodes and edges
+def returnNodesNEdgesfKXML(fpthwKGXML):
+	#~
+	tree = ElementTree()
+	ptree=tree.parse(fpthwKGXML)
+	#~
+	title=ptree.get('title')
+	prots=ptree.findall('entry')
+	reactns=ptree.findall('reaction')
+	#~
+	edges,ndstmp=set(),set()
+	nreactns=len(reactns)
+	cr=0#count reacts
+	while nreactns>cr:
+		cr+=1
+		reactn=reactns.pop()
+		mainid=reactn.get('id')
+		ndstmp.add(mainid)#add node
+		reacttyp=reactn.get('type')
+		sbstrts=reactn.findall('substrate')
+		while len(sbstrts)>0:
+			csbstrt=sbstrts.pop()
+			csbtsid=csbstrt.get('id')
+			ndstmp.add(csbtsid)#add node
+			if reacttyp=='irreversible':
+				edges.add((csbtsid,mainid))#add edges
+			elif reacttyp=='reversible':
+				edges.add((mainid,csbtsid))#add edges
+				edges.add((csbtsid,mainid))#add edges
+		#~
+		prdcts=reactn.findall('product')
+		while len(prdcts)>0:
+			prdct=prdcts.pop()
+			prodctid=prdct.get('id')
+			ndstmp.add(prodctid)#add node
+			if reacttyp=='irreversible':
+				edges.add((mainid,prodctid))#add edges
+			elif reacttyp=='reversible':
+				edges.add((mainid,prodctid))#add edges
+				edges.add((prodctid,mainid))#add edges
+	#~ Nodes
+	nprots=len(prots)
+	cp=0#count prots
+	dnodes={}
+	while nprots>cp:
+		cp+=1
+		prot=prots.pop()
+		tmpProtnm=prot.get('id')
+		if tmpProtnm in ndstmp:
+			dnodes[prot.get('id')]=set(prot.get('name').split())#each genename for each Id
+	return dnodes,edges,title
+
+#~ make calculation on pathways
+def rtrnAvrgLen(edges,strNds,endNds):
+	wG=nx.DiGraph()#reference graph
+	wG.add_edges_from(edges)
+	dPairsSrcSnks=nx.all_pairs_shortest_path_length(wG)#dictionary between sources and sink and length
+	nstartNdsID0=len(strNds)
+	cstrtNds=0
+	nPaths=0
+	lPathLen=[]
+	while nstartNdsID0>cstrtNds:
+		cStartNd=strNds.pop()#current start node
+		dEndNdsLen=dPairsSrcSnks.pop(cStartNd)
+		for cendNd in dEndNdsLen:
+			if cendNd in endNds:
+				lPathLen.append(dEndNdsLen[cendNd])
+				nPaths+=1
+		cstrtNds+=1
+	AvrgPthLen=0
+	if nPaths!=0:
+		AvrgPthLen=Decimal(sum(lPathLen))/Decimal(str(nPaths))
+	return nPaths,AvrgPthLen
+
+def main():
+	parser = argparse.ArgumentParser(description='Rank pathways based on the change in length and number of paths connecting sources and sinks.')
+	parser.add_argument('--loc_file',metavar='correlational database',type=str,help='correlational database')
+	parser.add_argument('--species',metavar='species name',type=str,help='the species of interest in loc_file')
+	parser.add_argument('--output',metavar='output TXT file',type=str,help='the output file with the table in txt format. Column 1 is the diference between column 2 and column 3, Column 2 is the pathway average length (between sources and sinks) including the genes in the input list, Column 3 is the pathway average length EXCLUDING the genes in the input list, Column 4 is the rank based on column 1. Column 5 is the diference between column 6 and column 7, Column 6 is the number of paths between sources and sinks, including the genes in the input list, Column 7 is the number of paths between sources and sinks EXCLUDING the genes in the input list, Column 8 is the rank based on column 5. Column 9 I the pathway name' )
+	parser.add_argument('--posKEGGclmn',metavar='column number',type=int,help='the column with the KEGG pathway code/name')
+	parser.add_argument('--KEGGgeneposcolmn',metavar='column number',type=int,help='column with the KEGG gene code')
+	parser.add_argument('--input',metavar='input TXT file',type=str,help='the input file with the table in txt format')
+	#~
+	#~Open arguments
+	class C(object):
+		pass
+	fulargs=C()
+	parser.parse_args(sys.argv[1:],namespace=fulargs)
+	#test input vars
+	inputf,loc_file,species,output,posKEGGclmn,Kgeneposcolmn=fulargs.input,fulargs.loc_file,fulargs.species,fulargs.output,fulargs.posKEGGclmn,fulargs.KEGGgeneposcolmn
+	posKEGGclmn-=1#correct pos
+	Kgeneposcolmn-=1
+	#~ Get the extra variables
+	crDB=[x.split() for x in open(loc_file).read().splitlines() if x.split()[0]==species][0]
+	sppPrefx,dinput=crDB[1],crDB[2]
+	#~ set decimal positions
+	getcontext().prec = 3
+	#make a dictionary of valid genes
+	dKEGGcPthws=dict([(x.split('\t')[Kgeneposcolmn],set([y.split('=')[0] for y in x.split('\t')[posKEGGclmn].split('.')])) for x in open(inputf).read().splitlines()[1:] if x.strip()])
+	sdGenes=set([x for x in dKEGGcPthws.keys() if x.find('.')>-1])
+	while True:#to crrect names with more than one gene
+		try:
+			mgenes=sdGenes.pop()
+			pthwsAssotd=dKEGGcPthws.pop(mgenes)
+			mgenes=mgenes.split('.')
+			for eachg in mgenes:
+				dKEGGcPthws[eachg]=pthwsAssotd
+		except:
+			break
+	#~
+	lPthwsF=[x for x in os.listdir(dinput) if x.find('.xml')>-1 if x not in ['cfa04070.xml']]
+	nPthws=len(lPthwsF)
+	cPthw=0
+	lPthwPthN=[]#the output list for number of paths
+	lPthwPthAvr=[]#the output list for the length of paths
+	#~
+	while cPthw<nPthws:
+		cPthw+=1
+		KEGGpathw=lPthwsF.pop()
+		comdKEGGpathw=KEGGpathw.split('.')[0]
+		tmpddGenrcgenPresent=set()
+		sKEGGc=dKEGGcPthws.keys()
+		lsKEGGc=len(sKEGGc)
+		ctPthw=0
+		while ctPthw < lsKEGGc:#to save memory
+			eachK=sKEGGc.pop()
+			alPthws=dKEGGcPthws[eachK]
+			if comdKEGGpathw in alPthws:
+				tmpddGenrcgenPresent.add(':'.join([sppPrefx,eachK]))
+			ctPthw+=1
+		#~ Make graph calculations
+		dnodes,edges,title=returnNodesNEdgesfKXML(open(os.path.join(dinput,KEGGpathw)))
+		startNdsID0,endNdsOD0=returnstartanendnodes(edges)
+		startNdsOri=copy(startNdsID0)
+		#~
+		nPaths='C'#stands for circuit
+		AvrgPthLen='I'#stand for infinite
+		if len(startNdsID0)>0 and len(endNdsOD0)>0:
+			nPaths,AvrgPthLen=rtrnAvrgLen(edges,startNdsID0,endNdsOD0)
+		#~ work with the genes in the list
+		genestodel=set()
+		lnodes=len(dnodes)
+		sNds=set(dnodes)
+		ctPthw=0
+		while ctPthw<lnodes:
+			ctPthw+=1
+			cNod=sNds.pop()
+			sgenes=dnodes.pop(cNod)
+			if len(sgenes.intersection(tmpddGenrcgenPresent))==len(sgenes):
+				genestodel.add(cNod)
+		#~ del nodes from graph edges
+		wnPaths,wAvrgPthLen=copy(nPaths),copy(AvrgPthLen)
+		if len(genestodel)>0:
+			wedges=set([x for x in edges if len(set(x).intersection(genestodel))==0])
+			wstartNds,wendNds=returnstartanendnodes(wedges)
+			if nPaths!='C':
+				wstartNds=[x for x in wstartNds if x in startNdsOri]
+				wendNds=[x for x in wendNds if x in endNdsOD0]
+			if len(wstartNds)>0 and len(wendNds)>0:
+				wnPaths,wAvrgPthLen=rtrnAvrgLen(wedges,wstartNds,wendNds)
+		#~ Calculate the differences
+		orNP,mutNP,oriLen,mutLen=nPaths,wnPaths,AvrgPthLen,wAvrgPthLen
+		if nPaths=='C':
+			orNP=Decimal('1000')
+			oriLen=Decimal('1000')
+		if wnPaths=='C':
+			mutNP=Decimal('1000')
+			mutLen=Decimal('1000')
+		lPthwPthN.append([orNP-mutNP,nPaths,wnPaths,'='.join([comdKEGGpathw,title])])#print nPaths,AvrgPthLen
+		lPthwPthAvr.append([oriLen-mutLen,AvrgPthLen,wAvrgPthLen,'='.join([comdKEGGpathw,title])])#print nPaths,AvrgPthLen
+	doutrnkPthN=rankdN(lPthwPthN)
+	doutrnkPthAvr=rankdAvr(lPthwPthAvr)
+	#~
+	sall=['\t'.join([doutrnkPthAvr[x[4]],'\t'.join(x)]) for x in doutrnkPthN]
+	salef=open(output,'w')
+	salef.write('\n'.join(sall))
+	salef.close()
+	return 0
+
+
+if __name__ == '__main__':
+	main()
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/cdblib.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,230 @@
+#!/usr/bin/env python
+
+'''
+Manipulate DJB's Constant Databases. These are 2 level disk-based hash tables
+that efficiently handle many keys, while remaining space-efficient.
+
+    http://cr.yp.to/cdb.html
+
+When generated databases are only used with Python code, consider using hash()
+rather than djb_hash() for a tidy speedup.
+'''
+
+from _struct import Struct
+from itertools import chain
+
+
+def py_djb_hash(s):
+    '''Return the value of DJB's hash function for the given 8-bit string.'''
+    h = 5381
+    for c in s:
+        h = (((h << 5) + h) ^ ord(c)) & 0xffffffff
+    return h
+
+try:
+    from _cdblib import djb_hash
+except ImportError:
+    djb_hash = py_djb_hash
+
+read_2_le4 = Struct('<LL').unpack
+write_2_le4 = Struct('<LL').pack
+
+
+class Reader(object):
+    '''A dictionary-like object for reading a Constant Database accessed
+    through a string or string-like sequence, such as mmap.mmap().'''
+
+    def __init__(self, data, hashfn=djb_hash):
+        '''Create an instance reading from a sequence and using hashfn to hash
+        keys.'''
+        if len(data) < 2048:
+            raise IOError('CDB too small')
+
+        self.data = data
+        self.hashfn = hashfn
+
+        self.index = [read_2_le4(data[i:i+8]) for i in xrange(0, 2048, 8)]
+        self.table_start = min(p[0] for p in self.index)
+        # Assume load load factor is 0.5 like official CDB.
+        self.length = sum(p[1] >> 1 for p in self.index)
+
+    def iteritems(self):
+        '''Like dict.iteritems(). Items are returned in insertion order.'''
+        pos = 2048
+        while pos < self.table_start:
+            klen, dlen = read_2_le4(self.data[pos:pos+8])
+            pos += 8
+
+            key = self.data[pos:pos+klen]
+            pos += klen
+
+            data = self.data[pos:pos+dlen]
+            pos += dlen
+
+            yield key, data
+
+    def items(self):
+        '''Like dict.items().'''
+        return list(self.iteritems())
+
+    def iterkeys(self):
+        '''Like dict.iterkeys().'''
+        return (p[0] for p in self.iteritems())
+    __iter__ = iterkeys
+
+    def itervalues(self):
+        '''Like dict.itervalues().'''
+        return (p[1] for p in self.iteritems())
+
+    def keys(self):
+        '''Like dict.keys().'''
+        return [p[0] for p in self.iteritems()]
+
+    def values(self):
+        '''Like dict.values().'''
+        return [p[1] for p in self.iteritems()]
+
+    def __getitem__(self, key):
+        '''Like dict.__getitem__().'''
+        value = self.get(key)
+        if value is None:
+            raise KeyError(key)
+        return value
+
+    def has_key(self, key):
+        '''Return True if key exists in the database.'''
+        return self.get(key) is not None
+    __contains__ = has_key
+
+    def __len__(self):
+        '''Return the number of records in the database.'''
+        return self.length
+
+    def gets(self, key):
+        '''Yield values for key in insertion order.'''
+        # Truncate to 32 bits and remove sign.
+        h = self.hashfn(key) & 0xffffffff
+        start, nslots = self.index[h & 0xff]
+
+        if nslots:
+            end = start + (nslots << 3)
+            slot_off = start + (((h >> 8) % nslots) << 3)
+
+            for pos in chain(xrange(slot_off, end, 8),
+                             xrange(start, slot_off, 8)):
+                rec_h, rec_pos = read_2_le4(self.data[pos:pos+8])
+
+                if not rec_h:
+                    break
+                elif rec_h == h:
+                    klen, dlen = read_2_le4(self.data[rec_pos:rec_pos+8])
+                    rec_pos += 8
+
+                    if self.data[rec_pos:rec_pos+klen] == key:
+                        rec_pos += klen
+                        yield self.data[rec_pos:rec_pos+dlen]
+
+    def get(self, key, default=None):
+        '''Get the first value for key, returning default if missing.'''
+        # Avoid exception catch when handling default case; much faster.
+        return chain(self.gets(key), (default,)).next()
+
+    def getint(self, key, default=None, base=0):
+        '''Get the first value for key converted it to an int, returning
+        default if missing.'''
+        value = self.get(key, default)
+        if value is not default:
+            return int(value, base)
+        return value
+
+    def getints(self, key, base=0):
+        '''Yield values for key in insertion order after converting to int.'''
+        return (int(v, base) for v in self.gets(key))
+
+    def getstring(self, key, default=None, encoding='utf-8'):
+        '''Get the first value for key decoded as unicode, returning default if
+        not found.'''
+        value = self.get(key, default)
+        if value is not default:
+            return value.decode(encoding)
+        return value
+
+    def getstrings(self, key, encoding='utf-8'):
+        '''Yield values for key in insertion order after decoding as
+        unicode.'''
+        return (v.decode(encoding) for v in self.gets(key))
+
+
+class Writer(object):
+    '''Object for building new Constant Databases, and writing them to a
+    seekable file-like object.'''
+
+    def __init__(self, fp, hashfn=djb_hash):
+        '''Create an instance writing to a file-like object, using hashfn to
+        hash keys.'''
+        self.fp = fp
+        self.hashfn = hashfn
+
+        fp.write('\x00' * 2048)
+        self._unordered = [[] for i in xrange(256)]
+
+    def put(self, key, value=''):
+        '''Write a string key/value pair to the output file.'''
+        assert type(key) is str and type(value) is str
+
+        pos = self.fp.tell()
+        self.fp.write(write_2_le4(len(key), len(value)))
+        self.fp.write(key)
+        self.fp.write(value)
+
+        h = self.hashfn(key) & 0xffffffff
+        self._unordered[h & 0xff].append((h, pos))
+
+    def puts(self, key, values):
+        '''Write more than one value for the same key to the output file.
+        Equivalent to calling put() in a loop.'''
+        for value in values:
+            self.put(key, value)
+
+    def putint(self, key, value):
+        '''Write an integer as a base-10 string associated with the given key
+        to the output file.'''
+        self.put(key, str(value))
+
+    def putints(self, key, values):
+        '''Write zero or more integers for the same key to the output file.
+        Equivalent to calling putint() in a loop.'''
+        self.puts(key, (str(value) for value in values))
+
+    def putstring(self, key, value, encoding='utf-8'):
+        '''Write a unicode string associated with the given key to the output
+        file after encoding it as UTF-8 or the given encoding.'''
+        self.put(key, unicode.encode(value, encoding))
+
+    def putstrings(self, key, values, encoding='utf-8'):
+        '''Write zero or more unicode strings to the output file. Equivalent to
+        calling putstring() in a loop.'''
+        self.puts(key, (unicode.encode(value, encoding) for value in values))
+
+    def finalize(self):
+        '''Write the final hash tables to the output file, and write out its
+        index. The output file remains open upon return.'''
+        index = []
+        for tbl in self._unordered:
+            length = len(tbl) << 1
+            ordered = [(0, 0)] * length
+            for pair in tbl:
+                where = (pair[0] >> 8) % length
+                for i in chain(xrange(where, length), xrange(0, where)):
+                    if not ordered[i][0]:
+                        ordered[i] = pair
+                        break
+
+            index.append((self.fp.tell(), length))
+            for pair in ordered:
+                self.fp.write(write_2_le4(*pair))
+
+        self.fp.seek(0)
+        for pair in index:
+            self.fp.write(write_2_le4(*pair))
+        self.fp = None # prevent double finalize()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/cluster_kegg.xml	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,46 @@
+<tool id="gd_cluster_kegg" name="Cluster KEGG" version="1.0.0">
+  <description>: Group gene categories connected by shared genes</description>
+
+  <command interpreter="python">
+    #set $ensembltcolmn_arg = int(str($ensembltcolmn)) - 1
+    cluster_onConnctdComps.py '--input=$input' '--input_columns=${input.dataset.metadata.columns}' '--outfile=$output' '--threshold=$threshold' '--ENSEMBLTcolmn=$ensembltcolmn_arg' '--classClmns=$classclmns'
+  </command>
+
+  <inputs>
+    <param name="input" type="data" format="tabular" label="Input dataset" />
+    <param name="ensembltcolmn" type="data_column" data_ref="input" numerical="false" label="Column with the ENSEMBL code in the Input dataset" />
+    <param name="threshold" type="float" value="90" min="0" max="100" label="Threshold to disconnect the nodes" />
+    <param name="classclmns" size="10" type="text" value="c1,c2" label="Gene category columns"/>
+
+  </inputs>
+
+  <outputs>
+    <data name="output" format="tabular" />
+  </outputs>
+
+  <requirements>
+    <requirement type="package" version="1.8.1">networkx</requirement>
+  </requirements>
+
+  <help>
+**What it does**
+
+The program builds a network of gene categories connected by shared
+genes.  The edges of this network are weighted based on the number of
+genes that each node shares.  The clustering coefficient, c\ :sub:`u`\ , is then calculated for each node using the formula:
+
+.. image:: $PATH_TO_IMAGES/cluster_kegg_formula.png
+
+|
+
+where deg(u) is the degree of u and edge weights, w\ :sub:`uv`\ ,
+are normalized by the maximum weight in the network.  The cluster
+coefficients are then filtered by our program based on threshold (that
+could be a percentile or a value choose by the user) and all the nodes
+with a cluster coefficient lower than this threshold are deleted from
+the network.  Finally, the program reports each connected component as
+a cluster of gene classifications.  With our program a lower number of
+gene categories is obtained, but the results are easier to interpret as
+they exclude genes present in many gene groups.
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/cluster_onConnctdComps.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,223 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+#       Cluster_GOKEGG.py
+#
+#       Copyright 2013 Oscar Reina <oscar@niska.bx.psu.edu>
+#
+#       This program is free software; you can redistribute it and/or modify
+#       it under the terms of the GNU General Public License as published by
+#       the Free Software Foundation; either version 2 of the License, or
+#       (at your option) any later version.
+#
+#       This program is distributed in the hope that it will be useful,
+#       but WITHOUT ANY WARRANTY; without even the implied warranty of
+#       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#       GNU General Public License for more details.
+#
+#       You should have received a copy of the GNU General Public License
+#       along with this program; if not, write to the Free Software
+#       Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+#       MA 02110-1301, USA.
+
+import argparse
+import os
+from networkx import connected_components,Graph,clustering
+from numpy import percentile
+from decimal import Decimal,getcontext
+from itertools import permutations,combinations
+import sys
+
+def rtrnClustrsOnCltrCoff(dNodesWeightMin,threshold,perctile=True):
+	"""
+	From a file with three columns: nodeA, nodeB and a score, it returns
+	the strong and weak connected components produced when the edges
+	below the percentage threshold (or value) are excluded.
+	"""
+	#~
+	Gmin = Graph()
+	for nodeA,nodeB in dNodesWeightMin:
+		wMin=dNodesWeightMin[nodeA,nodeB]
+		Gmin.add_edge(nodeA,nodeB,weight=wMin)
+	#~
+	clstrCoffcMin=clustering(Gmin,weight='weight')
+	#~
+	if perctile:
+		umbralMin=percentile(clstrCoffcMin.values(),threshold)
+	else:
+		umbralMin=threshold
+	#~
+	GminNdsRmv=[x for x in clstrCoffcMin if clstrCoffcMin[x]<umbralMin]
+	#~
+	Gmin.remove_nodes_from(GminNdsRmv)
+	#~
+	dTermCmptNumbWkMin=rtrndata(Gmin)
+	#~
+	salelClustr=[]
+	srtdterms=sorted(dTermCmptNumbWkMin.keys())
+	for echTerm in srtdterms:
+		try:
+			MinT=dTermCmptNumbWkMin[echTerm]
+		except:
+			MinT='-'
+		salelClustr.append('\t'.join([echTerm,MinT]))
+	#~
+	return salelClustr
+
+def rtrndata(G):
+	"""
+	returna list of terms and its clustering, as well as clusters from
+	a networkx formatted file.
+	"""
+	#~
+	cntCompnts=0
+	dTermCmptNumbWk={}
+	for echCompnt in connected_components(G):
+		cntCompnts+=1
+		#print '.'.join(echCompnt)
+		for echTerm in echCompnt:
+			dTermCmptNumbWk[echTerm]=str(cntCompnts)
+	#~
+	return dTermCmptNumbWk
+
+def rtrnCATcENSEMBLc(inCATfile,classClmns,ENSEMBLTcolmn,nonHdr=True):
+	"""
+	return a dictionary of all the categories in an input file with
+	a set of genes. Takes as input a file with categories an genes.
+	"""
+	dCAT={}
+	dENSEMBLTCAT={}
+	for eachl in open(inCATfile,'r'):
+		if nonHdr and eachl.strip():
+			ENSEMBLT=eachl.splitlines()[0].split('\t')[ENSEMBLTcolmn]
+			sCAT=set()
+			for CATcolmn in classClmns:
+				sCAT.update(set([x for x in eachl.splitlines()[0].split('\t')[CATcolmn].split('.')]))
+			sCAT=sCAT.difference(set(['','U','N']))
+			if len(sCAT)>0:
+				dENSEMBLTCAT[ENSEMBLT]=sCAT
+			for CAT in sCAT:
+				try:
+					dCAT[CAT].add(ENSEMBLT)
+				except:
+					dCAT[CAT]=set([ENSEMBLT])
+		nonHdr=True
+	#~
+	dCAT=dict([(x,len(dCAT[x])) for x in dCAT.keys()])
+	#~
+	return dCAT,dENSEMBLTCAT
+
+
+def calcDistance(sCAT1,sCAT2):
+	"""
+	takes as input two set of genesin different categories and returns
+	a value 1-percentage of gene shared cat1->cat2, and cat2->cat1.
+	"""
+	getcontext().prec=5
+	lgensS1=Decimal(len(sCAT1))
+	lgensS2=Decimal(len(sCAT2))
+	shrdGns=sCAT1.intersection(sCAT2)
+	lenshrdGns=len(shrdGns)
+	#~
+	dC1C2=1-(lenshrdGns/lgensS1)
+	dC2C1=1-(lenshrdGns/lgensS2)
+	#~
+	return dC1C2,dC2C1
+
+def rtnPrwsdtncs(dCAT,dENSEMBLTCAT):
+	"""
+	return a mcl formated pairwise distances from a list of categories
+	"""
+	#~
+	getcontext().prec=5
+	dCATdst={}
+	lENSEMBL=dENSEMBLTCAT.keys()
+	l=len(lENSEMBL)
+	c=0
+	for ENSEMBL in lENSEMBL:
+		c+=1
+		lCAT=dENSEMBLTCAT.pop(ENSEMBL)
+		for CAT1,CAT2 in combinations(lCAT, 2):
+			try:
+				dCATdst[CAT1,CAT2]+=1
+			except:
+				dCATdst[CAT1,CAT2]=1
+			try:
+				dCATdst[CAT2,CAT1]+=1
+			except:
+				dCATdst[CAT2,CAT1]=1
+	#~
+	dNodesWeightMin={}
+	l=len(dCATdst)
+	for CAT1,CAT2 in dCATdst.keys():
+		shrdGns=dCATdst.pop((CAT1,CAT2))
+		dC1C2=float(shrdGns)
+		nodeA,nodeB=sorted([CAT1,CAT2])
+		try:
+			cscor=dNodesWeightMin[nodeA,nodeB]
+			if cscor>=dC1C2:
+				dNodesWeightMin[nodeA,nodeB]=dC1C2
+		except:
+			dNodesWeightMin[nodeA,nodeB]=dC1C2
+	#
+	return dNodesWeightMin
+
+def parse_class_columns(val, max_col):
+	int_list = []
+
+	for elem in [x.strip() for x in val.split(',')]:
+		if elem[0].lower() != 'c':
+			print >> sys.stderr, "bad column format:", elem
+			sys.exit(1)
+
+        int_val = as_int(elem[1:])
+
+        if int_val is None:
+			print >> sys.stderr, "bad column format:", elem
+			sys.exit(1)
+        elif not 1 <= int_val <= max_col:
+			print >> sys.stderr, "column out of range:", elem
+			sys.exit(1)
+
+        int_list.append(int_val - 1)
+
+	return int_list
+
+def as_int(val):
+    try:
+        return int(val)
+    except ValueError:
+        return None
+    else:
+        raise
+
+def main():
+	"""
+	"""
+	#~ bpython cluster_onConnctdComps.py --input=../conctFinal_CEU.tsv --outfile=../borrar.txt --threshold=90 --ENSEMBLTcolmn=1 --classClmns='20 22'
+	parser = argparse.ArgumentParser(description='Returns the count of genes in ...')
+	parser.add_argument('--input',metavar='input TXT file',type=str,help='the input file with the table in txt format.',required=True)
+	parser.add_argument('--input_columns',metavar='input INT value',type=int,help='the number of columns in the input file.',required=True)
+	parser.add_argument('--outfile',metavar='input TXT file',type=str,help='the output file with the connected components.',required=True)
+	parser.add_argument('--threshold',metavar='input FLOAT value',type=float,help='the threshold to disconnect the nodes.',required=True)
+	parser.add_argument('--ENSEMBLTcolmn',metavar='input INT file',type=int,help='the column with the ENSEMBLE code in the input.',required=True)
+	parser.add_argument('--classClmns',metavar='input STR value',type=str,help='the list of columns with the gene categories separated by space.',required=True)
+	args = parser.parse_args()
+	infile = args.input
+	threshold = args.threshold
+	outfile = args.outfile
+	ENSEMBLTcolmn = args.ENSEMBLTcolmn
+	classClmns = parse_class_columns(args.classClmns, args.input_columns)
+	#~
+	dCAT,dENSEMBLTCAT=rtrnCATcENSEMBLc(infile,classClmns,ENSEMBLTcolmn)
+	dNodesWeightMin=rtnPrwsdtncs(dCAT,dENSEMBLTCAT)
+	salelClustr=rtrnClustrsOnCltrCoff(dNodesWeightMin,threshold)
+	#~
+	with open(outfile, 'w') as salef:
+		print >> salef, '\n'.join(salelClustr)
+	#~
+	#~
+
+if __name__ == '__main__':
+	main()
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/commits.log	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,77 @@
+
+:2fb0cd69fe08
+cathy  2013-07-08  15:21
+New versions of Rank Pathways and Rank Terms (code from Oscar, 16 May 2013).
+Rank Pathways still needs updates in the help text and sample input/output,
+and both tools need test datasets.
+
+:6255a0a7fad5
+cathy  2013-05-10  15:45
+Bumped version number of the Pathway Image tool.
+
+:45ed8c76cabf
+cathy  2013-05-10  15:07
+Fix from Oscar to handle changes in the KEGG Mapper web form.
+
+:ea75d4a4ded0
+cathy  2013-03-04  16:04
+- documented the new Restore Attributes tool, and tweaked its description and UI
+- adjusted doc for the new Rank Terms tool, and tweaked its description and UI
+- added doc paragraph for new ability of Filter SNPs to handle % thresholds
+- tweaked description and doc for Rank Pathways and PCA
+- bumped version numbers for Filter SNPs and Remarkable Intervals, due to
+  new functionality and bug fix, respectively
+
+:b63c3675e0a3
+cathy  2013-02-04  18:31
+Edited the README to instruct users to copy the files in static/images
+into their Galaxy installation.
+
+:f556345a4185
+cathy  2012-11-02  17:45
+Tweaked parameter labels.
+
+:8703e16fca01
+cathy  2012-10-04  11:42
+More changes by Cathy, mostly in Phylogenetic Tree and Prepare Input,
+including some UI adjustments.
+
+:7b775e5b68b4
+cathy  2012-09-28  00:55
+Galaxy didn't like my RST syntax.  :-/
+
+:9b5b4f73bd98
+cathy  2012-09-28  00:08
+Tweaks by Cathy, e.g. adjusting text where renamed tools are mentioned.
+Also riemerized through first section, "Initial Analysis".
+
+:93eeef51be96
+cathy  2012-09-27  14:03
+Fixed datatype bugs in the Filter SNPs and Aggregate Individuals tools.
+
+:119e1e904cc4
+cathy  2012-09-26  15:38
+Restored modify_snp_table.py from the archive, since it's still used by the
+Filter SNPs and Aggregate Individuals tools.
+
+:cc508d55cc9d
+cathy  2012-09-26  15:16
+Tweaked description for the Prepare Input tool.
+
+:cdb8430b1659
+cathy  2012-09-26  15:10
+Added ": " at the beginning of each description to separate it from the name.
+
+:3286bdea6b3d
+cathy  2012-09-26  13:01
+Clarified tool names and descriptions.
+
+:8a9bdfc0d31f
+cathy  2012-09-19  17:15
+Edited docs in aggregate_gd_indivs.xml: clarified "What it does", reformatted
+example data.
+
+:f7c6a18af605
+cathy  2012-09-19  11:31
+Edited docs in specify.xml: clarified "What it does", reformatted example data.
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/coverage_distributions.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,145 @@
+#!/usr/bin/env python
+
+import gd_util
+import os
+import sys
+from Population import Population
+import gd_composite
+
+################################################################################
+
+if len(sys.argv) < 7:
+    gd_util.die('Usage')
+
+input, data_source, output, extra_files_path, ind_arg = sys.argv[1:6]
+
+population_info = []
+p1_input = None
+all_individuals = False
+
+for arg in sys.argv[6:]:
+    if arg == 'all_individuals':
+        all_individuals = True
+    elif len(arg) > 12 and arg[:12] == 'individuals:':
+        p1_input = arg[12:]
+    elif len(arg) > 11 and arg[:11] == 'population:':
+        file, name = arg[11:].split(':', 1)
+        population_info.append((file, name))
+
+p_total = Population()
+p_total.from_wrapped_dict(ind_arg)
+
+################################################################################
+
+gd_util.mkdir_p(extra_files_path)
+
+################################################################################
+
+prog = 'coverage'
+
+args = [ prog ]
+args.append(input)
+args.append(data_source)
+
+user_coverage_file = os.path.join(extra_files_path, 'coverage.txt')
+args.append(user_coverage_file)
+
+population_list = []
+
+if all_individuals:
+    tags = p_total.tag_list()
+elif p1_input is not None:
+    p1 = Population()
+    this_pop = Population()
+    this_pop.from_population_file(p1_input)
+    population_list.append(this_pop)
+    p1.from_population_file(p1_input)
+    if not p_total.is_superset(p1):
+        gd_util.die('There is an individual in the population that is not in the SNP table')
+    tags = p1.tag_list()
+else:
+    tags = []
+    for population_file, population_name in population_info:
+        population = Population()
+        this_pop = Population()
+        this_pop.from_population_file(population_file)
+        population_list.append(this_pop)
+        population.from_population_file(population_file)
+        if not p_total.is_superset(population):
+            gd_util.die('There is an individual in the {} population that is not in the SNP table'.format(population_name))
+        columns = population.column_list()
+        for column in columns:
+            tags.append('{0}:{1}'.format(column, population_name))
+
+for tag in tags:
+    args.append(tag)
+
+## text output
+coverage_file = 'coverage.txt'
+with open(coverage_file, 'w') as fh:
+    gd_util.run_program(prog, args, stdout=fh)
+
+## graphical output
+coverage2_file = 'coverage2.txt'
+with open(coverage_file) as fh, open(coverage2_file, 'w') as ofh:
+    for line in fh:
+        line = line.rstrip('\r\n')
+        elems = line.split('\t')
+        name = elems.pop(0)
+        values = [ elems[0] ]
+        for idx in range(1, len(elems)):
+            val = str(float(elems[idx]) - float(elems[idx-1]))
+            values.append(val)
+        print >> ofh, '{0}\t{1}'.format(name, '\t'.join(values))
+
+################################################################################
+
+prog = 'Rscript'
+
+args = [ prog ]
+
+_realpath = os.path.realpath(__file__)
+_script_dir = os.path.dirname(_realpath)
+r_script_file = os.path.join(_script_dir, 'coverage_plot.r')
+args.append(r_script_file)
+
+pdf_file = os.path.join(extra_files_path, 'coverage.pdf')
+args.append(pdf_file)
+
+gd_util.run_program(prog, args)
+
+################################################################################
+
+info_page = gd_composite.InfoPage()
+info_page.set_title('Coverage distributions Galaxy Composite Dataset')
+
+display_file = gd_composite.DisplayFile()
+display_value = gd_composite.DisplayValue()
+
+out_pdf = gd_composite.Parameter(name='coverage.pdf', value='coverage.pdf', display_type=display_file)
+out_txt = gd_composite.Parameter(name='coverage.txt', value='coverage.txt', display_type=display_file)
+
+info_page.add_output_parameter(out_pdf)
+info_page.add_output_parameter(out_txt)
+
+if data_source == '0':
+    data_source_value = 'sequence coverage'
+elif data_source == '1':
+    data_source_value = 'estimated genotype'
+
+in_data_source = gd_composite.Parameter(description='Data source', value=data_source_value, display_type=display_value)
+
+info_page.add_input_parameter(in_data_source)
+
+if population_list:
+    misc_populations =  gd_composite.Parameter(name='Populations', value=population_list, display_type=gd_composite.DisplayPopulationList())
+    info_page.add_misc(misc_populations)
+else:
+    misc_individuals = gd_composite.Parameter(name='Individuals', value=tags, display_type=gd_composite.DisplayTagList())
+    info_page.add_misc(misc_individuals)
+
+with open (output, 'w') as ofh:
+    print >> ofh, info_page.render()
+
+sys.exit(0)
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/coverage_distributions.xml	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,131 @@
+<tool id="gd_coverage_distributions" name="Coverage Distributions" version="1.0.0">
+  <description>: Examine sequence coverage for SNPs</description>
+
+  <command interpreter="python">
+    #import json
+    #import base64
+    #import zlib
+    #set $ind_names = $input.dataset.metadata.individual_names
+    #set $ind_colms = $input.dataset.metadata.individual_columns
+    #set $ind_dict = dict(zip($ind_names, $ind_colms))
+    #set $ind_json = json.dumps($ind_dict, separators=(',',':'))
+    #set $ind_comp = zlib.compress($ind_json, 9)
+    #set $ind_arg = base64.b64encode($ind_comp)
+    coverage_distributions.py '$input' '0' '$output' '$output.files_path' '$ind_arg'
+    #if $individuals.choice == '0'
+      'all_individuals'
+    #else if $individuals.choice == '1'
+      #set $arg = 'individuals:%s' % str($individuals.p1_input)
+        '$arg'
+    #else if $individuals.choice == '2'
+      #for $population in $individuals.populations
+        #set $arg = 'population:%s:%s' % (str($population.p_input), str($population.p_input.name))
+        '$arg'
+      #end for
+    #end if
+  </command>
+
+  <inputs>
+    <param name="input" type="data" format="gd_snp" label="SNP dataset" />
+
+    <conditional name="individuals">
+      <param name="choice" type="select" label="Compute for">
+        <option value="0" selected="true">All individuals</option>
+        <option value="1">Individuals in a population</option>
+        <option value="2">Totals of populations</option>
+      </param>
+      <when value="0" />
+      <when value="1">
+        <param name="p1_input" type="data" format="gd_indivs" label="Population individuals" />
+      </when>
+      <when value="2">
+        <repeat name="populations" title="Population" min="1">
+          <param name="p_input" type="data" format="gd_indivs" label="individuals" />
+        </repeat>
+      </when>
+    </conditional>
+
+    <!--
+    <param name="data_source" type="select" label="Data source">
+      <option value="0" selected="true">Sequence coverage</option>
+      <option value="1">Genotype quality</option>
+    </param>
+    -->
+  </inputs>
+
+  <outputs>
+    <data name="output" format="html" />
+  </outputs>
+
+  <requirements>
+    <requirement type="package" version="0.1">gd_c_tools</requirement>
+  </requirements>
+
+  <tests>
+    <test>
+      <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" />
+      <param name="choice" value="0" />
+      <output name="output" file="test_out/coverage_distributions/coverage.html" ftype="html" compare="diff" lines_diff="2">
+        <extra_files type="file" name="coverage.pdf" value="test_out/coverage_distributions/coverage.pdf" compare="sim_size" delta = "1000"/>
+        <extra_files type="file" name="coverage.txt" value="test_out/coverage_distributions/coverage.txt" />
+      </output>
+    </test>
+  </tests>
+
+  <help>
+
+**Dataset formats**
+
+The input dataset is in gd_snp_ format.
+The output is a composite dataset, containing both a text table and a PDF plot.
+(`Dataset missing?`_)
+
+.. _gd_snp: ./static/formatHelp.html#gd_snp
+.. _Dataset missing?: ./static/formatHelp.html
+
+-----
+
+**What it does**
+
+This tool reports distributions of a SNP reliability indicator, in this case
+sequence coverage, for individuals or populations.
+The coverage can be computed for all individuals, a subset of individuals,
+or totals for populations defined by the Specify Individuals tool.
+The results are reported as a text table giving the cumulative distributions,
+and as a plot.
+
+-----
+
+**Example**
+
+- input::
+
+    chr1  14929  A  G  999    21  30  1  127   7  11   1  28   7  29   0   5   2  5   1  17  10  14  1  81   17  74  1   42  15  22  1  125   29  84  1   88   6  10  1  11  30  23  1  79  19  1  2  71  24  0   2   99  41  10   2    2
+    chr1  17451  C  T  6.88  119   1  2  255  12   0   2  63  35   0   2  59  14  0   2  72  19   1  2  57  101   1  2  255  38   8  1   20  125   0  2  255  13   0  2  62  42   0  2  51  44  0  2  64  26  0   2  108  59   0   2  194
+    chr1  30922  G  T  999     0  23  0   66   0   0  -1   0   0   0  -1   0   0  0  -1   0   0   2  0   3    0  14  0   39  14  16  1  153    0  45  0  132   6   0  2  48  19   0  2  87   3  0  2  32   0  0  -1   0    0   0  -1    0
+    etc.
+
+- text output::
+
+                0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19
+     John West  0  0  0  0  0  0  0  0  1  1  1  1  2  2  3  3  4  4  5  6
+       NA12892  0  2  5 11 20 31 43 55 67 77 84 90 93 96 97 98 99 99 99 99
+       NA12891  0  0  0  0  0  1  1  2  3  5  6  9 11 15 19 23 29 35 41 47
+       NA12249  1  4 11 23 38 54 68 79 88 93 96 98 99 99 99 99 99 99 99 99
+       NA12342  0  0  1  1  2  4  6  9 13 18 23 29 36 43 50 58 65 71 77 82
+           KB1  0  0  0  0  0  0  0  0  0  0  0  0  1  1  1  1  1  1  2  2
+           ABT  0  0  0  0  0  0  1  1  1  2  3  4  5  6  8 10 12 14 18 21
+       NA18507  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  1
+       NA19238  0  0  0  1  2  4  6 10 14 19 25 32 39 47 55 62 69 76 81 86
+       NA19239  0  0  0  0  1  1  2  4  5  8 11 15 19 24 31 37 44 51 58 65
+            YH  2  4  6  7  8  8  9 10 11 12 14 17 19 22 25 29 32 36 40 45
+        KOREAN  0  0  1  1  3  4  5  7 10 12 15 19 22 27 31 37 42 48 54 60
+           JPT  0  0  0  0  0  0  0  0  1  1  1  2  2  3  4  5  7  8 10 12
+           etc.
+
+graphical output:
+
+.. image:: $PATH_TO_IMAGES/gd_coverage.png
+
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/coverage_plot.r	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,32 @@
+args <- commandArgs(TRUE);
+output_file <- args[1];
+
+x <- read.table('coverage2.txt', skip=1, sep='\t');
+
+individuals <- dim(x)[1];
+max_cov <- dim(x)[2] - 2;
+max_val <- max(x[-1]) / 100;
+colors <- rainbow(individuals);
+
+line_width = 3;
+xt = t(x);
+
+xvals <- c(0:max_cov);
+values <- as.numeric(as.vector(xt[,1][-1]))/100;
+
+pdf(file=output_file, onefile=TRUE, width=10, height=6);
+
+plot(xvals, values, type='l', ylim=c(0, max_val), xlim=c(0, max_cov), col=colors[1], lwd=line_width, xlab="Coverage", ylab="Proportion");
+
+if (individuals > 1) {
+    for (i in 2:individuals) {
+        values <- as.numeric(as.vector(xt[,i][-1]))/100;
+        lines(xvals, values, col=colors[i], lwd=line_width);
+    }
+}
+
+
+names <- as.vector(t(x[1]));
+legend(x='topright', legend=names, fill=colors, bty='n');
+
+dev.off();
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/cp.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,11 @@
+#!/usr/bin/env python
+
+import shutil
+import sys
+
+if len(sys.argv) != 3:
+    print >> sys.stderr, 'Usage: %s <src> <dst>' % sys.argv[0]
+    sys.exit(1)
+
+src, dst =  sys.argv[1:3]
+shutil.copy2(src, dst)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/datatypes_conf.xml	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,15 @@
+<?xml version="1.0"?>
+<datatypes>
+  <datatype_files>
+    <datatype_file name="wsf.py"/>
+  </datatype_files>
+  <registration>
+    <datatype extension="gd_indivs" type="galaxy.datatypes.wsf:Individuals" display_in_upload="true"/>
+    <datatype extension="gd_ped" type="galaxy.datatypes.wsf:Wped" display_in_upload="true"/>
+    <datatype extension="gd_snp" type="galaxy.datatypes.wsf:GDSnp" display_in_upload="true"/>
+    <datatype extension="gd_genotype" type="galaxy.datatypes.wsf:GDGenotype" display_in_upload="true"/>
+    <datatype extension="gd_sap" type="galaxy.datatypes.wsf:GDSap" display_in_upload="true"/>
+    <datatype extension="gd_covered_cds" type="galaxy.datatypes.interval:Interval" subclass="true" display_in_upload="true"/>
+  </registration>
+  <sniffers/>
+</datatypes>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/discover_familial_relationships.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,100 @@
+#!/usr/bin/env python
+
+import sys
+import gd_util
+
+from Population import Population
+
+################################################################################
+
+if len(sys.argv) != 6:
+    gd_util.die('Usage')
+
+input, input_type, ind_arg, pop_input, output = sys.argv[1:]
+
+p_total = Population()
+p_total.from_wrapped_dict(ind_arg)
+
+p1 = Population()
+p1.from_population_file(pop_input)
+if not p_total.is_superset(p1):
+    gd_util.die('There is an individual in the population that is not in the SNP table')
+
+################################################################################
+
+prog = 'kinship_prep'
+
+args = [ prog ]
+args.append(input)  # a Galaxy SNP table
+args.append(0)      # required number of reads for each individual to use a SNP
+args.append(0)      # required genotype quality for each individual to use a SNP
+args.append(0)      # minimum spacing between SNPs on the same scaffold
+
+for tag in p1.tag_list():
+    if input_type == 'gd_genotype':
+        column, name = tag.split(':')
+        tag = '{0}:{1}'.format(int(column) - 2, name)
+    args.append(tag)
+
+gd_util.run_program(prog, args)
+
+# kinship.map
+# kinship.ped
+# kinship.dat
+
+################################################################################
+
+prog = 'king'
+
+args = [ prog ]
+args.append('-d')
+args.append('kinship.dat')
+args.append('-p')
+args.append('kinship.ped')
+args.append('-m')
+args.append('kinship.map')
+args.append('--kinship')
+
+gd_util.run_program(prog, args)
+
+# king.kin
+
+################################################################################
+
+valid_header = 'FID\tID1\tID2\tN_SNP\tZ0\tPhi\tHetHet\tIBS0\tKinship\tError\n'
+
+with open('king.kin') as fh:
+    header = fh.readline()
+    if header != valid_header:
+        gd_util.die('crap')
+
+    with open(output, 'w') as ofh:
+
+        for line in fh:
+            elems = line.split('\t')
+            if len(elems) != 10:
+                gd_util.die('crap')
+
+            x = elems[1]
+            y = elems[2]
+            z = elems[8]
+
+            f = float(z)
+
+            message = ''
+
+            if f > 0.354:
+                message = 'duplicate or MZ twin'
+            elif f >= 0.177:
+                message = '1st degree relatives'
+            elif f >= 0.0884:
+                message = '2nd degree relatives'
+            elif f >= 0.0442:
+                message = '3rd degree relatives'
+
+            print >> ofh, '\t'.join([x, y, z, message])
+
+################################################################################
+
+sys.exit(0)
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/discover_familial_relationships.xml	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,67 @@
+<tool id="gd_discover_familial_relationships" name="Close relatives" version="1.0.0">
+  <description>: Discover familial relationships</description>
+
+  <command interpreter="python">
+    #import json
+    #import base64
+    #import zlib
+    #set $ind_names = $input.dataset.metadata.individual_names
+    #set $ind_colms = $input.dataset.metadata.individual_columns
+    #set $ind_dict = dict(zip($ind_names, $ind_colms))
+    #set $ind_json = json.dumps($ind_dict, separators=(',',':'))
+    #set $ind_comp = zlib.compress($ind_json, 9)
+    #set $ind_arg = base64.b64encode($ind_comp)
+    discover_familial_relationships.py '$input' '$input.ext' '$ind_arg' '$pop_input' '$output'
+  </command>
+
+  <inputs>
+    <param name="input" type="data" format="gd_snp,gd_genotype" label="Input dataset" />
+    <param name="pop_input" type="data" format="gd_indivs" label="Individuals dataset" />
+  </inputs>
+
+  <outputs>
+    <data name="output" format="tabular" />
+  </outputs>
+
+  <requirements>
+    <requirement type="package" version="0.1">gd_c_tools</requirement>
+  </requirements>
+
+  <!--
+  <tests>
+  </tests>
+  -->
+
+  <help>
+
+**Dataset formats**
+
+The input datasets are in gd_snp_, gd_genotype_, and gd_indivs_ formats.
+The output dataset is in tabular_ format.
+
+.. _gd_snp: ./static/formatHelp.html#gd_snp
+.. _gd_genotype: ./static/formatHelp.html#gd_genotype
+.. _gd_indivs: ./static/formatHelp.html#gd_indivs
+.. _tabular: ./static/formatHelp.html#tab
+
+-----
+
+**What it does**
+
+The user specifies a SNP table (either gd_snp or gd_genotype format) and
+a set of individuals.  The command runs the KING program (Manichaikul et
+al., 2010) to look for pairs of distinct individuals in the specified
+set that have a close family relationship.  Putatively related pairs
+are classified into five categories:
+
+  1. duplicate or MZ twin
+  2. 1st degree relatives -- siblings (other than identical twins) or parent-child
+  3. 2nd degree relatives -- e.g., half-siblings, grandparent-grandchild pair, individual-uncle/aunt pair
+  4. 3rd degree relatives -- e.g., first cousins
+  5. unrelated
+
+Reference:
+
+Manichaikul A, Mychaleckyj JC, Rich SS, Daly K, Sale M, Chen WM (2010) Robust relationship inference in genome-wide association studies. Bioinformatics 26: 2867-2873.
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/diversity_pi.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,63 @@
+#!/usr/bin/env python
+
+import gd_util
+import sys
+from Population import Population
+
+################################################################################
+
+def load_pop(file, wrapped_dict):
+    if file == '/dev/null':
+        pop = None
+    else:
+        pop = Population()
+        pop.from_wrapped_dict(wrapped_dict)
+    return pop
+
+def append_tags(the_list, p, p_type, val):
+    if p is None:
+        return
+    for tag in p.tag_list():
+        column, name = tag.split(':')
+        if p_type == 'gd_genotype':
+            column = int(column) - 2
+        the_list.append('{0}:{1}:{2}'.format(val, column, name))
+
+################################################################################
+
+if len(sys.argv) != 11:
+    gd_util.die('Usage')
+
+snp_input, snp_ext, snp_arg, cov_input, cov_ext, cov_arg, indiv_input, min_coverage, req_thresh, output = sys.argv[1:]
+
+p_snp = load_pop(snp_input, snp_arg)
+p_cov = load_pop(cov_input, cov_arg)
+
+p_ind = Population()
+p_ind.from_population_file(indiv_input)
+
+if not p_snp.is_superset(p_ind):
+  gd_util.die('There is an individual in the population individuals that is not in the SNP/Genotype table')
+
+if p_cov is not None and (not p_cov.is_superset(p_ind)):
+  gd_util.die('There is an individual in the population individuals that is not in the Coverage table')
+
+################################################################################
+
+prog = 'mito_pi'
+
+args = [ prog ]
+args.append(snp_input)
+args.append(cov_input)
+args.append(min_coverage)
+args.append(req_thresh)
+
+append_tags(args, p_ind, 'gd_indivs', 0)
+append_tags(args, p_snp, snp_ext, 1)
+append_tags(args, p_cov, cov_ext, 2)
+
+with open(output, 'w') as fh:
+    gd_util.run_program(prog, args, stdout=fh)
+
+sys.exit(0)
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/diversity_pi.xml	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,73 @@
+<tool id="gd_diversity_pi" name="Diversity" version="1.0.0">
+  <description>: pi, allowing for unsequenced intervals</description>
+
+  <command interpreter="python">
+    #import json
+    #import base64
+    #import zlib
+    #set $snp_names = $input.dataset.metadata.individual_names
+    #set $snp_colms = $input.dataset.metadata.individual_columns
+    #set $snp_dict = dict(zip($snp_names, $snp_colms))
+    #set $snp_json = json.dumps($snp_dict, separators=(',',':'))
+    #set $snp_comp = zlib.compress($snp_json, 9)
+    #set $snp_arg = base64.b64encode($snp_comp)
+    #if $use_cov.choice == '1'
+      #set $cov_file = $use_cov.cov_input
+      #set $cov_ext = $use_cov.cov_input.ext
+      #set $cov_names = $use_cov.cov_input.dataset.metadata.individual_names
+      #set $cov_colms = $use_cov.cov_input.dataset.metadata.individual_columns
+      #set $cov_dict = dict(zip($cov_names, $cov_colms))
+      #set $cov_json = json.dumps($cov_dict, separators=(',',':'))
+      #set $cov_comp = zlib.compress($cov_json, 9)
+      #set $cov_arg = base64.b64encode($cov_comp)
+      #set $cov_min = $use_cov.min_coverage
+      #set $cov_req = $use_cov.req_thresh
+    #else
+      #set $cov_file = '/dev/null'
+      #set $cov_ext = ''
+      #set $cov_arg = ''
+      #set $cov_min = 0
+      #set $cov_req = 0
+    #end if
+    diversity_pi.py '$input' '$input.ext' '$snp_arg' '$cov_file' '$cov_ext' '$cov_arg' '$indiv_input' '$cov_min' '$cov_req' '$output'
+  </command>
+
+  <inputs>
+    <param name="input" type="data" format="gd_snp,gd_genotype" label="SNP/Genotype dataset" />
+    <conditional name="use_cov">
+      <param name="choice" type="select" format="integer" label="Include Coverage dataset">
+        <option value="1" selected="true">yes</option>
+        <option value="0">no</option>
+      </param>
+      <when value="0" />
+      <when value="1">
+        <param name="cov_input" type="data" format="gd_snp,gd_genotype" label="Coverage dataset" />
+        <param name="min_coverage" type="integer" min="1" value="1" label="Minimum coverage" />
+        <param name="req_thresh" type="integer" min="1" value="1" label="Lower bound for shared well-covered bp" />
+      </when>
+    </conditional>
+    <param name="indiv_input" type="data" format="gd_indivs" label="Population Individuals" />
+  </inputs>
+
+  <outputs>
+    <data name="output" format="txt" metadata_source="input" />
+  </outputs>
+
+  <requirements>
+    <requirement type="package" version="0.1">gd_c_tools</requirement>
+  </requirements>
+
+  <help>
+**What it does**
+
+The user supplies the following:
+
+   1. A file in gd_genotype or gd_snp format giving the mitochondrial SNPs.
+   2. An optional gd_genotype file gives the sequence coverage for each individual at each mitochondrial position.
+   3. A set of individuals specified with the "Specify individuals" tool.
+   4. The minimum depth of sequence coverage. Positions where an individual has less coverage are ignored.
+   5. The number of adequately covered positions that must be shared by two individuals before their diversity is included in the reported average.
+
+For each pair of individual (with adequate shared coverage), the program divides the number of nucleotide difference between the individuals in those intervals by the intervals' total length. Those ratios are averaged over the relevant pairs of individuals.
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/dpmix.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,166 @@
+#!/usr/bin/env python
+
+import gd_util
+import sys
+import os
+from Population import Population
+import gd_composite
+from dpmix_plot import make_dpmix_plot
+from LocationFile import LocationFile
+
+def load_and_check_pop(name, file, total_pop):
+    p = Population(name=name)
+    p.from_population_file(file)
+    if not total_pop.is_superset(p):
+        gd_util.die('There is an individual in {0} that is not in the SNP table'.format(name))
+    return p
+
+def append_pop_tags(the_list, p, input_type, number):
+    for tag in p.tag_list():
+        column, name = tag.split(':')
+        if input_type == 'gd_genotype':
+            column = int(column) - 2
+        the_list.append('{0}:{1}:{2}'.format(column, number, name))
+
+################################################################################
+
+if len(sys.argv) != 22:
+    print "usage"
+    sys.exit(1)
+
+input, input_type, data_source, switch_penalty, ap1_input, ap1_name, ap2_input, ap2_name, ap3_input, ap3_name, p_input, output, output2, output2_dir, dbkey, ref_column, galaxy_data_index_dir, heterochromatin_loc_file, ind_arg, het_arg, add_logs = sys.argv[1:]
+
+if ap1_input == '/dev/null':
+    use_reference = True
+else:
+    use_reference = False
+
+if ap3_input == '/dev/null':
+    populations = 2
+else:
+    populations = 3
+
+chrom = 'all'
+
+if het_arg == 'use_installed':
+    loc_path = os.path.join(galaxy_data_index_dir, heterochromatin_loc_file)
+    location_file = LocationFile(loc_path)
+    heterochrom_path = location_file.get_values_if_exists(dbkey)
+    if heterochrom_path is None:
+        heterochrom_path = '/dev/null'
+elif het_arg == 'use_none':
+    heterochrom_path = '/dev/null'
+else:
+    heterochrom_path = het_arg
+
+population_list = []
+
+p_total = Population()
+p_total.from_wrapped_dict(ind_arg)
+
+if not use_reference:
+    ap1 = load_and_check_pop('Ancestral population 1', ap1_input, p_total)
+    population_list.append(ap1)
+
+ap2 = load_and_check_pop('Ancestral population 2', ap2_input, p_total)
+population_list.append(ap2)
+
+if populations == 3:
+    ap3 = load_and_check_pop('Ancestral population 3', ap3_input, p_total)
+    population_list.append(ap3)
+
+p = load_and_check_pop('Potentially admixed', p_input, p_total)
+population_list.append(p)
+
+gd_util.mkdir_p(output2_dir)
+
+################################################################################
+# Create tabular file
+################################################################################
+
+misc_file = os.path.join(output2_dir, 'summary.txt')
+
+prog = 'dpmix'
+
+args = [ prog ]
+args.append(input)
+args.append(ref_column)
+args.append(chrom)
+args.append(data_source)
+args.append(add_logs)
+args.append(switch_penalty)
+args.append(heterochrom_path)
+args.append(misc_file)
+
+if use_reference:
+    args.append('0:1:reference')
+else:
+    append_pop_tags(args, ap1, input_type, 1)
+
+append_pop_tags(args, ap2, input_type, 2)
+
+if populations == 3:
+    append_pop_tags(args, ap3, input_type, 3)
+
+append_pop_tags(args, p, input_type, 0)
+
+with open(output, 'w') as fh:
+    gd_util.run_program(prog, args, stdout=fh)
+
+################################################################################
+# Create pdf file
+################################################################################
+
+if populations == 3:
+    state2name = {
+        0:'heterochromatin',
+        1:ap1_name,
+        2:ap2_name,
+        3:ap3_name
+    }
+else:
+    state2name = {
+        0:'heterochromatin',
+        1:ap1_name,
+        2:ap2_name
+    }
+
+pdf_file = os.path.join(output2_dir, 'picture.pdf')
+make_dpmix_plot(dbkey, output, pdf_file, galaxy_data_index_dir, state2name=state2name, populations=populations)
+
+################################################################################
+# Create html
+################################################################################
+
+info_page = gd_composite.InfoPage()
+info_page.set_title('dpmix Galaxy Composite Dataset')
+
+display_file = gd_composite.DisplayFile()
+display_value = gd_composite.DisplayValue()
+
+out_pdf = gd_composite.Parameter(name='picture.pdf', value='picture.pdf', display_type=display_file)
+out_misc = gd_composite.Parameter(name='summary.txt', value='summary.txt', display_type=display_file)
+
+info_page.add_output_parameter(out_pdf)
+info_page.add_output_parameter(out_misc)
+
+if data_source == '0':
+    data_source_value = 'sequence coverage'
+elif data_source == '1':
+    data_source_value = 'estimated genotype'
+
+in_data_source = gd_composite.Parameter(description='Data source', value=data_source_value, display_type=display_value)
+in_switch_penalty = gd_composite.Parameter(description='Switch penalty', value=switch_penalty, display_type=display_value)
+
+info_page.add_input_parameter(in_data_source)
+info_page.add_input_parameter(in_switch_penalty)
+
+misc_populations =  gd_composite.Parameter(name='Populations', value=population_list, display_type=gd_composite.DisplayPopulationList())
+
+info_page.add_misc(misc_populations)
+
+with open(output2, 'w') as ofh:
+    print >> ofh, info_page.render()
+
+sys.exit(0)
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/dpmix.xml	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,194 @@
+<tool id="gd_dpmix" name="Admixture" version="1.1.0">
+  <description>: Map genomic intervals resembling specified source populations</description>
+
+  <command interpreter="python">
+    #import json
+    #import base64
+    #import zlib
+    #set $ind_names = $input.dataset.metadata.individual_names
+    #set $ind_colms = $input.dataset.metadata.individual_columns
+    #set $ind_dict = dict(zip($ind_names, $ind_colms))
+    #set $ind_json = json.dumps($ind_dict, separators=(',',':'))
+    #set $ind_comp = zlib.compress($ind_json, 9)
+    #set $ind_arg = base64.b64encode($ind_comp)
+    dpmix.py '$input'
+    #if $input_type.choice == '0'
+      'gd_snp' '$input_type.data_source'
+    #else if $input_type.choice == '1'
+      'gd_genotype' '1'
+    #end if
+    #if $third_pop.choice == '0'
+      #set $ap3_arg = '/dev/null'
+      #set $ap3_name_arg = ''
+    #else if $third_pop.choice == '1'
+      #set $ap3_arg = $third_pop.ap3_input
+      #set $ap3_name_arg = $third_pop.ap3_input.name
+    #end if
+    #if $user_het.choice == '0'
+      #set $het_arg = 'use_installed'
+    #else if $user_het.choice == '1'
+      #set $het_arg = $user_het.het_file
+    #else if $user_het.choice == '2'
+      #set $het_arg = 'use_none'
+    #end if
+    '$switch_penalty'
+    #if $use_reference.choice == '0'
+      '$ap1_input' '$ap1_input.name'
+    #else if $use_reference.choice == '1'
+      '/dev/null' 'reference'
+    #end if
+    '$ap2_input' '$ap2_input.name' '$ap3_arg' '$ap3_name_arg' '$p_input' '$output' '$output2' '$output2.files_path' '$input.dataset.metadata.dbkey' '$input.dataset.metadata.ref' '$GALAXY_DATA_INDEX_DIR' 'gd.heterochromatic.loc' '$ind_arg' '$het_arg' '$add_logs'
+  </command>
+
+  <inputs>
+    <conditional name="input_type">
+      <param name="choice" type="select" format="integer" label="Input format">
+        <option value="0" selected="true">gd_snp</option>
+        <option value="1">gd_genotype</option>
+      </param>
+      <when value="0">
+        <param name="input" type="data" format="gd_snp" label="SNP dataset">
+          <validator type="unspecified_build" message="This dataset does not have a reference species and cannot be used with this tool" />
+        </param>
+
+        <param name="data_source" type="select" format="integer" label="Similarity metric">
+          <option value="0">sequence coverage</option>
+          <option value="1" selected="true">estimated genotype</option>
+        </param>
+      </when>
+      <when value="1">
+        <param name="input" type="data" format="gd_genotype" label="Genotype dataset">
+          <validator type="unspecified_build" message="This dataset does not have a reference species and cannot be used with this tool" />
+        </param>
+      </when>
+    </conditional>
+
+    <conditional name="use_reference">
+      <param name="choice" type="select" format="integer" label="History item or Reference sequence">
+        <option value="0" selected="true">History item</option>
+        <option value="1">Reference sequence</option>
+      </param>
+      <when value="0">
+        <param name="ap1_input" type="data" format="gd_indivs" label="Source population 1 individuals" />
+      </when>
+      <when value="1" />
+    </conditional>
+
+    <param name="ap2_input" type="data" format="gd_indivs" label="Source population 2 individuals" />
+
+    <conditional name="third_pop">
+      <param name="choice" type="select" format="integer" label="Include third source population">
+        <option value="0" selected="true">no</option>
+        <option value="1">yes</option>
+      </param>
+      <when value="0" />
+      <when value="1">
+        <param name="ap3_input" type="data" format="gd_indivs" label="Source population 3 individuals" />
+      </when>
+    </conditional>
+
+    <param name="p_input" type="data" format="gd_indivs" label="Potentially admixed individuals" />
+
+    <param name="switch_penalty" type="float" min="0" value="10" label="Genotype switch penalty" help="Note:  The best choice for the Genotype switch penalty depends on the density of SNPs and the age of the admixture events.  With 50,000 SNPs in a vertebrate genome, 10.0 might be appropriate, with millions of SNPs, 100.0 might work better.  We recommend experimenting with various thresholds on minimal spacing between SNVs (to increase independence), minimal FST between the source populations (to identify &quot;ancestry informative markers&quot;), and Genotype switch penalty, to reach conclusions that are robust to changes in analysis parameters."/>
+
+    <conditional name="user_het">
+      <param name="choice" type="select" format="integer" label="Heterochromatin info">
+        <option value="0" selected="true">use installed</option>
+        <option value="1">use your own</option>
+        <option value="2">use none</option>
+      </param>
+      <when value="0" />
+      <when value="1">
+        <param name="het_file" type="data" format="txt" label="Heterochromatin dataset" />
+      </when>
+    </conditional>
+
+    <param name="add_logs" type="select" format="integer" label="Probabilities">
+      <option value="1" selected="true">add logs of probabilities</option>
+      <option value="0">add probabilities</option>
+    </param>
+
+  </inputs>
+
+  <outputs>
+    <data name="output" format="tabular" />
+    <data name="output2" format="html" />
+  </outputs>
+
+  <requirements>
+    <requirement type="package" version="0.1">gd_c_tools</requirement>
+    <requirement type="package" version="1.2.1">matplotlib</requirement>
+  </requirements>
+
+  <tests>
+    <test>
+      <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" />
+      <param name="ap1_input" value="test_in/a.gd_indivs" ftype="gd_indivs" />
+      <param name="ap2_input" value="test_in/b.gd_indivs" ftype="gd_indivs" />
+      <param name="p_input" value="test_in/c.gd_indivs" ftype="gd_indivs" />
+      <param name="data_source" value="0" />
+      <param name="switch_penalty" value="10" />
+
+      <output name="output" file="test_out/dpmix/dpmix.tabular" />
+
+      <output name="output2" file="test_out/dpmix/dpmix.html" ftype="html" compare="diff" lines_diff="2">
+        <extra_files type="file" name="dpmix.pdf" value="test_out/dpmix/dpmix.pdf" compare="sim_size" delta = "10000" />
+        <extra_files type="file" name="misc.txt" value="test_out/dpmix/misc.txt" />
+      </output>
+    </test>
+  </tests>
+
+  <help>
+
+**Dataset formats**
+
+The input datasets are in gd_snp_, gd_genotype_, and gd_indivs_ formats.  It is important for
+the Individuals datasets to have unique names and for there to be no overlap
+between the two populations.  Rename these datasets if
+needed to make them unique.
+There are two output datasets, one tabular_ and one composite. (`Dataset missing?`_)
+
+.. _gd_snp: ./static/formatHelp.html#gd_snp
+.. _gd_genotype: ./static/formatHelp.html#gd_genotype
+.. _gd_indivs: ./static/formatHelp.html#gd_indivs
+.. _tabular: ./static/formatHelp.html#tab
+.. _Dataset missing?: ./static/formatHelp.html
+
+-----
+
+**What it does**
+
+The user specifies two or three source populations (i.e., sources
+for chromosomes) and a set of potentially admixed individuals, and
+chooses between the sequence coverage or the estimated genotypes to
+measure the similarity of genomic intervals in admixed individuals to
+the three classes of source chromosomes.  The user also specifies a
+"switch penalty", controlling the strength of evidence needed to switch
+between source populations as the the program scans along a chromosome.
+Choice of picksan appropriate value depends on the number of SNPs and, to
+a lesser extent, on the time since the admixture events.  With several
+million SNPs genome-wide, reasonable values might fall between 10
+and 100.  If there are 3 source populatons, then for each potentially
+admixed individual the program divides the genome into six "genotypes":
+
+1. homozygous for the first source population (i.e., both chromosomes from that population),
+2. homozygous for the second source population,
+3. homozygous for the third source population,
+4. heterozygous for the first and second populations (i.e., one chromosome from each),
+5. heterozygous for the first and third populations, or
+6. heterozygous for the second and third populations.
+
+Parts of a reference chromosome that are labeled as "heterochromatic"
+are given the "non-genotype" 0.  With two source populations, only
+"genotypes" 1, 2 and 3 are possible, where 3 now means heterozygous in
+the two source populations.
+
+There are two output datasets generated.  A tabular dataset with chromosome,
+start, stop, and pairs of columns containing the "genotypes" from above
+and label from the admixed individual.  The second dataset is a composite
+dataset with general information from the run and a link to a pdf which
+graphically shows the source population along each of the chromosomes.
+The second link is to a text file with summary information of the
+"genotypes" over the whole genome.
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/dpmix_plot.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,456 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import math
+
+import matplotlib as mpl
+mpl.use('PDF')
+from matplotlib.backends.backend_pdf import PdfPages
+import matplotlib.pyplot as plt
+from matplotlib.path import Path
+import matplotlib.patches as patches
+
+################################################################################
+
+def build_chrom_len_dict(dbkey, galaxy_data_index_dir):
+    chrom_len_root = os.path.join(galaxy_data_index_dir, 'shared/ucsc/chrom')
+    chrom_len_file = '{0}.len'.format(dbkey)
+    chrom_len_path = os.path.join(chrom_len_root, chrom_len_file)
+
+    chrom_len = {}
+
+    try:
+        with open(chrom_len_path) as fh:
+            for line in fh:
+                line = line.rstrip('\r\n')
+                elems = line.split()
+                if len(elems) == 2:
+                    chrom = elems[0]
+                    length = int(elems[1])
+                    chrom_len[chrom] = length
+    except:
+        pass
+
+    return chrom_len
+
+def parse_input_file(input_file):
+    chroms = []
+    individuals = []
+    data = {}
+    chrom_len = {}
+    used_states = []
+
+    with open(input_file) as fh:
+        for line in fh:
+            line = line.strip()
+            if line:
+                elems = line.split()
+                chrom = elems[0]
+                p1, p2, state = map(int, elems[1:4])
+                id = elems[4]
+
+                if state not in used_states:
+                    used_states.append(state)
+
+                if chrom not in chroms:
+                    chroms.append(chrom)
+
+                if id not in individuals:
+                    individuals.append(id)
+
+                data.setdefault(chrom, {})
+                data[chrom].setdefault(id, [])
+                data[chrom][id].append((p1, p2, state))
+
+                if p2 > chrom_len.setdefault(chrom, 0):
+                    chrom_len[chrom] = p2
+
+    return chroms, individuals, data, chrom_len, used_states
+
+def check_chroms(chroms, chrom_len, dbkey):
+    error = 0
+    for chrom in chroms:
+        if chrom not in chrom_len:
+            print >> sys.stderr, "Can't find length for {0} chromosome {1}".format(dbkey, chrom)
+            error = 1
+    if error:
+        sys.exit(1)
+
+def check_data(data, chrom_len, dbkey):
+    error = 0
+    for chrom in data:
+        chrom_beg = 0
+        chrom_end = chrom_len[chrom]
+        for individual in data[chrom]:
+            for p1, p2, state in data[chrom][individual]:
+                if p1 >= p2:
+                    print >> sys.stderr, "Bad data line: begin >= end: {0} {1} {2} {3}".format(chrom, p1, p2, state, individual)
+                    error = 1
+                if p1 < chrom_beg or p2 > chrom_end:
+                    print >> sys.stderr, "Bad data line: outside {0} boundaries[{1} - {2}]: {3} {4} {5} {6}".format(dbkey, chrom_beg, chrom_end, chrom, p1, p2, state, individual)
+                    error = 1
+    if error:
+        sys.exit(1)
+
+def make_rectangle(p1, p2, color, bottom=0.0, top=1.0):
+    verts = [
+        (p1, bottom),   # left, bottom
+        (p1, top),      # left, top
+        (p2, top),      # right, top
+        (p2, bottom),   # right, bottom
+        (0.0, 0.0)      # ignored
+    ]
+
+    codes = [
+        Path.MOVETO,
+        Path.LINETO,
+        Path.LINETO,
+        Path.LINETO,
+        Path.CLOSEPOLY
+    ]
+
+    path = Path(verts, codes)
+    return patches.PathPatch(path, facecolor=color, lw=0)
+
+def make_split_rectangle(p1, p2, top_color, bottom_color):
+    patch1 = make_rectangle(p1, p2, bottom_color, top=0.5)
+    patch2 = make_rectangle(p1, p2, top_color, bottom=0.5)
+    return [patch1, patch2]
+
+def make_state_rectangle_2pop(p1, p2, state, chrom, individual):
+    p1_color = 'r'
+    p2_color = 'g'
+    heterochromatin_color = '#c7c7c7'
+
+    if state == 0:
+        return [ make_rectangle(p1, p2, heterochromatin_color) ]
+    elif state == 1:
+        return [ make_rectangle(p1, p2, p1_color) ]
+    elif state == 2:
+        return [ make_rectangle(p1, p2, p2_color) ]
+    elif state == 3:
+        return make_split_rectangle(p1, p2, p1_color, p2_color)
+    else:
+        print >> sys.stderr, "Unknown state: {0}: {1} {2} {3} {4}".format(state, chrom, p1, p2, state, individual)
+        sys.exit(1)
+
+def make_state_rectangle_3pop(p1, p2, state, chrom, individual):
+    p1_color = 'r'
+    p2_color = 'g'
+    p3_color = 'b'
+    heterochromatin_color = '#c7c7c7'
+
+    if state == 0:
+        return [ make_rectangle(p1, p2, heterochromatin_color) ]
+    if state == 1:
+        return [ make_rectangle(p1, p2, p1_color) ]
+    if state == 2:
+        return [ make_rectangle(p1, p2, p2_color) ]
+    if state == 3:
+        return [ make_rectangle(p1, p2, p3_color) ]
+    if state == 4:
+        return make_split_rectangle(p1, p2, p1_color, p2_color)
+    if state == 5:
+        return make_split_rectangle(p1, p2, p1_color, p3_color)
+    if state == 6:
+        return make_split_rectangle(p1, p2, p2_color, p3_color)
+    else:
+        print >> sys.stderr, "Unknown state: {0}: {1} {2} {3} {4}".format(state, chrom, p1, p2, state, individual)
+        sys.exit(1)
+
+def nicenum(num, round=False):
+    if num == 0:
+        return 0.0
+
+    exp = int(math.floor(math.log10(num)))
+    f = num / math.pow(10, exp)
+
+    if round:
+        if f < 1.5:
+            nf = 1.0
+        elif f < 3.0:
+            nf = 2.0
+        elif f < 7.0:
+            nf = 5.0
+        else:
+            nf = 10.0
+    else:
+        if f <= 1.0:
+            nf = 1.0
+        elif f <= 2.0:
+            nf = 2.0
+        elif f <= 5.0:
+            nf = 5.0
+        else:
+            nf = 10.0
+
+    return nf * pow(10, exp)
+
+def tick_foo(beg, end, loose=False):
+    ntick = 10
+
+    range = nicenum(end - beg, round=False)
+    d = nicenum(range/(ntick - 1), round=True)
+    digits = int(math.floor(math.log10(d)))
+
+    if loose:
+        graph_min = math.floor(beg/d) * d
+        graph_max = math.ceil(end/d) * d
+    else:
+        graph_min = beg
+        graph_max = end
+
+    nfrac = max([-1 * digits, 0])
+    vals = []
+
+    stop = graph_max
+    if loose:
+        stop = graph_max + (0.5 * d)
+
+    x = graph_min
+    while x <= stop:
+        vals.append(int(x))
+        x += d
+
+    vals = vals[1:]
+
+#    if not loose:
+#        if vals[-1] < graph_max:
+#            vals.append(int(graph_max))
+
+    labels = []
+    for val in vals:
+        labels.append('{0}'.format(int(val/math.pow(10, digits))))
+
+#   labels.append('{0:.1f}'.format(vals[-1]/math.pow(10, digits)))
+
+    return vals, labels
+
+################################################################################
+################################################################################
+################################################################################
+################################################################################
+
+def space_for_legend(plot_params):
+    space = 0.0
+
+    legend_states = plot_params['legend_states']
+    if legend_states:
+        ind_space = plot_params['ind_space']
+        ind_height = plot_params['ind_height']
+        space += len(legend_states) * (ind_space + ind_height) - ind_space
+
+    return space
+
+################################################################################
+
+def space_for_chroms(plot_params, chroms, individuals, data):
+    space_dict = {}
+
+    chrom_height = plot_params['chrom_height']
+    ind_space = plot_params['ind_space']
+    ind_height = plot_params['ind_height']
+
+    for chrom in chroms:
+        space_dict[chrom] = chrom_height
+
+        individual_count = 0
+        for individual in individuals:
+            if individual in data[chrom]:
+                individual_count += 1
+
+        space_dict[chrom] += individual_count * (ind_space + ind_height)
+
+    return space_dict
+
+################################################################################
+
+def make_dpmix_plot(input_dbkey, input_file, output_file, galaxy_data_index_dir, state2name=None, populations=3):
+    fs_chrom_len = build_chrom_len_dict(input_dbkey, galaxy_data_index_dir)
+    chroms, individuals, data, chrom_len, used_states = parse_input_file(input_file)
+
+    ## populate chrom_len
+    for chrom in chrom_len.keys():
+        if chrom in fs_chrom_len:
+            chrom_len[chrom] = fs_chrom_len[chrom]
+
+    #check_chroms(chroms, chrom_len, input_dbkey)
+    check_data(data, chrom_len, input_dbkey)
+
+    ## plot parameters
+    plot_params = {
+        'plot_dpi':        300,
+        'page_width':     8.50,
+        'page_height':   11.00,
+        'top_margin':     0.10,
+        'bottom_margin':  0.10,
+        'chrom_space':    0.25,
+        'chrom_height':   0.25,
+        'ind_space':      0.10,
+        'ind_height':     0.25,
+        'legend_space':   0.10
+    }
+
+    ## in the legend, only print out states that are
+    ##   1) in the data
+    ##    - AND -
+    ##   2) in the state2name map
+    legend_states = []
+    if state2name is not None:
+        for state in used_states:
+            if state in state2name:
+                legend_states.append(state)
+
+    plot_params['legend_states'] = legend_states
+
+    ## choose the correct make_state_rectangle method
+    if populations == 3:
+        plot_params['rectangle_method'] = make_state_rectangle_3pop
+    elif populations == 2:
+        plot_params['rectangle_method'] = make_state_rectangle_2pop
+
+    pdf_pages = PdfPages(output_file)
+
+	## generate a list of chroms for each page
+
+    needed_for_legend = space_for_legend(plot_params)
+    needed_for_chroms = space_for_chroms(plot_params, chroms, individuals, data)
+
+    chrom_space_per_page = plot_params['page_height']
+    chrom_space_per_page -= plot_params['top_margin'] + plot_params['bottom_margin']
+    chrom_space_per_page -= needed_for_legend + plot_params['legend_space']
+    chrom_space_per_page -= plot_params['chrom_space']
+
+    chroms_left = chroms[:]
+    pages = []
+
+    space_left = chrom_space_per_page
+    chrom_list = []
+
+    while chroms_left:
+        chrom = chroms_left.pop(0)
+        space_needed = needed_for_chroms[chrom] + plot_params['chrom_space']
+        if (space_needed > chrom_space_per_page):
+            print >> sys.stderr, 'Multipage chroms not yet supported'
+            sys.exit(1)
+
+		## sometimes 1.9 - 1.9 < 0 (-4.4408920985e-16)
+		## so, we make sure it's not more than a millimeter over
+        if space_left - space_needed > -0.04:
+            chrom_list.append(chrom)
+            space_left -= space_needed
+        else:
+            pages.append(chrom_list[:])
+            chrom_list = []
+            chroms_left.insert(0, chrom)
+            space_left = chrom_space_per_page
+
+    ############################################################################
+
+    plot_dpi = plot_params['plot_dpi']
+    page_width = plot_params['page_width']
+    page_height = plot_params['page_height']
+    top_margin = plot_params['top_margin']
+    ind_space = plot_params['ind_space']
+    ind_height = plot_params['ind_height']
+    make_state_rectangle = plot_params['rectangle_method']
+    legend_space = plot_params['legend_space']
+    chrom_space = plot_params['chrom_space']
+    chrom_height = plot_params['chrom_height']
+
+    for page in pages:
+        fig = plt.figure(figsize=(page_width, page_height), dpi=plot_dpi)
+        bottom = 1.0 - (top_margin/page_height)
+
+        # print legend
+        if legend_states:
+            top = True
+            for state in sorted(legend_states):
+                if top:
+                    bottom -= ind_height/page_height
+                    top = False
+                else:
+                    bottom -= (ind_space + ind_height)/page_height
+
+                ax1 = fig.add_axes([0.0, bottom, 0.09, ind_height/page_height])
+                plt.axis('off')
+                ax1.set_xlim(0, 1)
+                ax1.set_ylim(0, 1)
+                for patch in make_state_rectangle(0, 1, state, 'legend', state2name[state]):
+                    ax1.add_patch(patch)
+
+                ax2 = fig.add_axes([0.10, bottom, 0.88, ind_height/page_height], frame_on=False)
+                plt.axis('off')
+                plt.text(0.0, 0.5, state2name[state], fontsize=10, ha='left', va='center')
+
+            bottom -= legend_space/page_height
+
+        # print chroms
+        top = True
+        for chrom in page:
+            length = chrom_len[chrom]
+            vals, labels = tick_foo(0, length)
+
+            if top:
+                bottom -= chrom_height/page_height
+                top = False
+            else:
+                bottom -= (chrom_space + chrom_height)/page_height
+
+            ax = fig.add_axes([0.0, bottom, 1.0, chrom_height/page_height])
+            plt.axis('off')
+            plt.text(0.5, 0.5, chrom, fontsize=14, ha='center')
+
+            individual_count = 0
+            for individual in individuals:
+                if individual in data[chrom]:
+                    individual_count += 1
+
+            i = 0
+            for individual in individuals:
+                if individual in data[chrom]:
+                    i += 1
+                    bottom -= (ind_space + ind_height)/page_height
+
+                    ax1 = fig.add_axes([0.0, bottom, 0.09, ind_height/page_height])
+                    plt.axis('off')
+                    plt.text(1.0, 0.5, individual, fontsize=10, ha='right', va='center')
+
+                    ax2 = fig.add_axes([0.10, bottom, 0.88, ind_height/page_height], frame_on=False)
+                    ax2.set_xlim(0, length)
+                    ax2.set_ylim(0, 1)
+
+                    if i != individual_count:
+                        plt.axis('off')
+                    else:
+                        ax2.tick_params(top=False, left=False, right=False, labelleft=False)
+                        ax2.set_xticks(vals)
+                        ax2.set_xticklabels(labels)
+
+                    for p1, p2, state in sorted(data[chrom][individual]):
+                        for patch in make_state_rectangle(p1, p2, state, chrom, individual):
+                            ax2.add_patch(patch)
+
+                    # extend last state to end of chrom
+                    if p2 < length:
+                        for patch in make_state_rectangle(p2, length, state, chrom, individual):
+                            ax2.add_patch(patch)
+
+
+        pdf_pages.savefig(fig)
+        plt.close(fig)
+
+    pdf_pages.close()
+
+################################################################################
+
+if __name__ == '__main__':
+    make_dpmix_plot('loxAfr3', 'output.dat', 'output2_files/picture.pdf', '/scratch/galaxy/home/oocyte/galaxy_oocyte/tool-data', state2name={0: 'heterochromatin', 1: 'reference', 2: 'asian'}, populations=2)
+#    input_dbkey, input_file, output_file, galaxy_data_index_dir = sys.argv[1:5]
+#    make_dpmix_plot(input_dbkey, input_file, output_file, galaxy_data_index_dir)
+    sys.exit(0)
+
+## notes
+# 1) pass in a state to name mapping
+# 2) only print out names for states which exist in the data, and are in the state to name mapping
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/draw_variants.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,113 @@
+#!/usr/bin/env python
+
+import gd_util
+import sys
+from Population import Population
+
+################################################################################
+
+def load_pop(file, wrapped_dict):
+    if file == '/dev/null':
+        pop = None
+    else:
+        pop = Population()
+        pop.from_wrapped_dict(wrapped_dict)
+    return pop
+
+def append_tags(the_list, p, p_type, val):
+    if p is None:
+        return
+    for tag in p.tag_list():
+        column, name = tag.split(':')
+        if p_type == 'gd_genotype':
+            column = int(column) - 2
+        the_list.append('{0}:{1}:{2}'.format(val, column, name))
+
+################################################################################
+
+if len(sys.argv) != 11:
+    gd_util.die('Usage')
+
+
+snp_file, snp_ext, snp_arg, indiv_input, annotation_input, cov_file, cov_ext, cov_arg, min_coverage, output = sys.argv[1:]
+
+p_snp = load_pop(snp_file, snp_arg)
+p_cov = load_pop(cov_file, cov_arg)
+
+if indiv_input == '/dev/null':
+    if p_snp is not None:
+        p_ind = p_snp
+    elif p_cov is not None:
+        p_ind = p_cov
+    else:
+        p_ind = None
+    order_p_ind = True
+else:
+    p_ind = Population()
+    p_ind.from_population_file(indiv_input)
+    order_p_ind = False
+
+## p ind must be from either p_snp or p_cov
+if p_snp is not None and p_cov is not None:
+    if not (p_snp.is_superset(p_ind) or p_cov.is_superset(p_ind)):
+        gd_util.die('There is an individual in the population individuals that is not in the SNP/Genotype or Coverage table')
+elif p_snp is not None:
+    if not p_snp.is_superset(p_ind):
+        gd_util.die('There is an individual in the population individuals that is not in the SNP/Genotype table')
+elif p_cov is not None:
+    if not p_cov.is_superset(p_ind):
+        gd_util.die('There is an individual in the population individuals that is not in the Coverage table')
+
+
+################################################################################
+
+prog = 'mito_draw'
+
+args = [ prog ]
+args.append(snp_file)
+args.append(cov_file)
+args.append(annotation_input)
+args.append(min_coverage)
+
+if order_p_ind:
+    for column in sorted(p_ind.column_list()):
+        individual = p_ind.individual_with_column(column)
+        name = individual.name.split()[0]
+        args.append('{0}:{1}:{2}'.format(0, column, name))
+else:
+    append_tags(args, p_ind, 'gd_indivs', 0)
+
+append_tags(args, p_snp, snp_ext, 1)
+append_tags(args, p_cov, cov_ext, 2)
+
+with open('Ji.spec', 'w') as fh:
+    gd_util.run_program(prog, args, stdout=fh)
+
+################################################################################
+
+prog = 'varplot'
+
+args = [ prog ]
+args.append('-w')
+args.append(3)
+args.append('-s')
+args.append(0.3)
+args.append('-g')
+args.append(0.2)
+args.append('Ji.spec')
+
+with open('Ji.svg', 'w') as fh:
+    gd_util.run_program(prog, args, stdout=fh)
+
+################################################################################
+
+prog = 'convert'
+
+args = [ prog ]
+args.append('-density')
+args.append(100)
+args.append('Ji.svg')
+args.append('tiff:{0}'.format(output))
+
+gd_util.run_program(prog, args)
+sys.exit(0)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/draw_variants.xml	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,102 @@
+<tool id="gd_draw_variants" name="Draw variants" version="1.0.0">
+  <description>: show positions of SNVs and unsequenced intervals</description>
+
+  <command interpreter="python">
+    #import json
+    #import base64
+    #import zlib
+    #if $use_snp.choice == '1'
+      #set $snp_file = $use_snp.snp_input
+      #set $snp_ext = $use_snp.snp_input.ext
+      #set $snp_names = $use_snp.snp_input.dataset.metadata.individual_names
+      #set $snp_colms = $use_snp.snp_input.dataset.metadata.individual_columns
+      #set $snp_dict = dict(zip($snp_names, $snp_colms))
+      #set $snp_json = json.dumps($snp_dict, separators=(',',':'))
+      #set $snp_comp = zlib.compress($snp_json, 9)
+      #set $snp_arg = base64.b64encode($snp_comp)
+    #else
+      #set $snp_file = '/dev/null'
+      #set $snp_ext = ''
+      #set $snp_arg = ''
+    #end if
+    #if $use_cov.choice == '1'
+      #set $cov_file = $use_cov.cov_input
+      #set $cov_ext = $use_cov.cov_input.ext
+      #set $cov_names = $use_cov.cov_input.dataset.metadata.individual_names
+      #set $cov_colms = $use_cov.cov_input.dataset.metadata.individual_columns
+      #set $cov_dict = dict(zip($cov_names, $cov_colms))
+      #set $cov_json = json.dumps($cov_dict, separators=(',',':'))
+      #set $cov_comp = zlib.compress($cov_json, 9)
+      #set $cov_arg = base64.b64encode($cov_comp)
+      #set $cov_min = $use_cov.min_coverage
+    #else
+      #set $cov_file = '/dev/null'
+      #set $cov_ext = ''
+      #set $cov_arg = ''
+      #set $cov_min = 0
+    #end if
+    #if $use_indiv.choice == '1'
+      #set $ind_arg = $use_indiv.indiv_input
+    #else
+      #set $ind_arg = '/dev/null'
+    #end if
+    draw_variants.py '$snp_file' '$snp_ext' '$snp_arg' '$ind_arg' '$annotation_input' '$cov_file' '$cov_ext' '$cov_arg' '$cov_min' '$output'
+  </command>
+
+  <inputs>
+    <conditional name="use_snp">
+      <param name="choice" type="select" format="integer" label="Include SNP/Genotype dataset">
+        <option value="1" selected="true">yes</option>
+        <option value="0">no</option>
+      </param>
+      <when value="0" />
+      <when value="1">
+        <param name="snp_input" type="data" format="gd_snp,gd_genotype" label="SNP/Genotype dataset" />
+      </when>
+    </conditional>
+    <conditional name="use_cov">
+      <param name="choice" type="select" format="integer" label="Include Coverage dataset">
+        <option value="1" selected="true">yes</option>
+        <option value="0">no</option>
+      </param>
+      <when value="0" />
+      <when value="1">
+        <param name="cov_input" type="data" format="gd_snp,gd_genotype" label="Coverage dataset" />
+        <param name="min_coverage" type="integer" min="1" value="1" label="Minimum coverage" />
+      </when>
+    </conditional>
+    <conditional name="use_indiv">
+      <param name="choice" type="select" label="Compute for">
+        <option value="0" selected="true">All individuals</option>
+        <option value="1">Individuals in a population</option>
+      </param>
+      <when value="0" />
+      <when value="1">
+        <param name="indiv_input" type="data" format="gd_indivs" label="Population Individuals" />
+      </when>
+    </conditional>
+    <param name="annotation_input" type="data" format="interval" label="Annotation dataset" />
+  </inputs>
+
+  <outputs>
+    <data name="output" format="tiff" />
+  </outputs>
+
+  <requirements>
+    <requirement type="package" version="0.1">gd_c_tools</requirement>
+  </requirements>
+
+  <help>
+**What it does**
+
+The user supplies the following:
+
+   1. A optional file in gd_genotype or gd_snp format giving the mitochondrial SNPs.
+   2. An optional gd_genotype file gives the sequence coverage for each individual at each mitochondrial position.
+   3. The minimum depth of sequence coverage. Positions where an individual has less coverage are ignoried.
+   4. A set of individuals specified with the "Specify individuals" tool.
+   5. A file of annotation for the reference mitochondrial sequence.
+
+The program draws a picture indicating the locations of SNPs and the inadequately covered interval.
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/echo.bash	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+
+if [ $# -lt 3 ]; then
+    echo "usage"
+    exit 1
+fi
+
+input="$1"
+output="$2"
+shift 2
+
+for individual in "$@"; do
+    echo "$individual" >> "$output"
+done
+
+exit 0
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/evaluate_population_numbers.bash	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+
+if [ $# -ne 3 ]; then
+    echo "usage"
+    exit 1
+fi
+
+input_ped_file="$1"
+output_file="$2"
+max_populations="$3"
+
+ADMIXTURE=admixture
+
+for (( i=1; $i <= $max_populations; i++ )); do
+    $ADMIXTURE --cv "$input_ped_file" $i 2>&1 | grep CV | perl -ne 's/CV error/CVE/; print;' >> "$output_file"
+done
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/evaluate_population_numbers.xml	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,83 @@
+<tool id="gd_evaluate_population_numbers" name="Population Complexity" version="1.0.0">
+  <description>: Evaluate possible numbers of ancestral populations</description>
+
+  <command interpreter="bash">
+    evaluate_population_numbers.bash "${input.extra_files_path}/admix.ped" "$output" "$max_populations"
+  </command>
+
+  <inputs>
+    <param name="input" type="data" format="gd_ped" label="Dataset" />
+    <param name="max_populations" type="integer" min="1" value="5" label="Maximum number of populations" />
+  </inputs>
+
+  <outputs>
+    <data name="output" format="txt" />
+  </outputs>
+
+  <!--
+  <tests>
+    <test>
+      <param name="input" value="fake" ftype="gd_ped" >
+        <metadata name="base_name" value="admix" />
+        <composite_data value="test_out/prepare_population_structure/prepare_population_structure.html" />
+        <composite_data value="test_out/prepare_population_structure/admix.ped" />
+        <composite_data value="test_out/prepare_population_structure/admix.map" />
+        <edit_attributes type="name" value="fake" />
+      </param>
+      <param name="max_populations" value="2" />
+
+      <output name="output" file="test_out/evaluate_population_numbers/evaluate_population_numbers.txt" />
+    </test>
+  </tests>
+  -->
+
+  <help>
+
+**Dataset formats**
+
+The input dataset is in gd_ped_ format.
+The output dataset is text.  (`Dataset missing?`_)
+
+.. _gd_ped: ./static/formatHelp.html#gd_ped
+.. _Dataset missing?: ./static/formatHelp.html
+
+-----
+
+**What it does**
+
+The user selects a gd_ped dataset generated by the Prepare Input tool.
+For all possible numbers K of ancestral
+populations, from 1 up to a user-specified maximum, this tool produces values
+that indicate how well the data can be explained as genotypes from individuals
+derived from K ancestral populations.  These values are computed by a 5-fold
+cross-validation procedure, so that a good choice for K will exhibit a low
+cross-validation error (CVE) compared with other potential settings for K.
+
+-----
+
+**Acknowledgments**
+
+We use the program "Admixture", downloaded from
+
+http://www.genetics.ucla.edu/software/admixture/
+
+and described in the paper "Fast model-based estimation of ancestry in
+unrelated individuals" by David H. Alexander, John Novembre and Kenneth Lange,
+Genome Research 19 (2009), pp. 1655-1664. Admixture is called with the "--cv"
+flag to produce these values.
+
+-----
+
+**Example**
+
+- output with max populations of 6::
+
+    CVE (K=1): 1.10120
+    CVE (K=2): 1.34683
+    CVE (K=3): 1.80611
+    CVE (K=4): 1.96339
+    CVE (K=5): 1.21522
+    CVE (K=6): 0.51501
+
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/extract_flanking_dna.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,89 @@
+#!/usr/bin/env python
+
+import os
+import sys
+from optparse import OptionParser
+import genome_diversity as gd
+
+def main_function( parse_arguments=None ):
+    if parse_arguments is None:
+        parse_arguments = lambda arguments: ( None, arguments )
+    def main_decorator( to_decorate ):
+        def decorated_main( arguments=None ):
+            if arguments is None:
+                arguments = sys.argv
+            options, arguments = parse_arguments( arguments )
+            rc = 1
+            try:
+                rc = to_decorate( options, arguments )
+            except Exception, err:
+                sys.stderr.write( 'ERROR: %s\n' % str( err ) )
+                traceback.print_exc()
+            finally:
+                sys.exit( rc )
+        return decorated_main
+    return main_decorator
+
+def parse_arguments( arguments ):
+    parser = OptionParser()
+    parser.add_option('--input',
+                        type='string', dest='input',
+                        help='file of selected SNPs')
+    parser.add_option('--output',
+                        type='string', dest='output',
+                        help='output file')
+    parser.add_option('--snps_loc',
+                        type='string', dest='snps_loc',
+                        help='snps .loc file')
+    parser.add_option('--scaffold_col',
+                        type="int", dest='scaffold_col',
+                        help='scaffold column in the input file')
+    parser.add_option('--pos_col',
+                        type="int", dest='pos_col',
+                        help='position column in the input file')
+    parser.add_option('--output_format',
+                        type="string", dest='output_format',
+                        help='output format, fasta or primer3')
+    parser.add_option('--species',
+                        type="string", dest='species',
+                        help='species')
+    return parser.parse_args( arguments[1:] )
+
+
+@main_function( parse_arguments )
+def main( options, arguments ):
+    if not options.input:
+        raise RuntimeError( 'missing --input option' )
+    if not options.output:
+        raise RuntimeError( 'missing --output option' )
+    if not options.snps_loc:
+        raise RuntimeError( 'missing --snps_loc option' )
+    if not options.scaffold_col:
+        raise RuntimeError( 'missing --scaffold_col option' )
+    if not options.pos_col:
+        raise RuntimeError( 'missing --pos_col option' )
+    if not options.output_format:
+        raise RuntimeError( 'missing --output_format option' )
+    if not options.species:
+        raise RuntimeError( 'missing --species option' )
+
+    snps = gd.SnpFile( filename=options.input, seq_col=int( options.scaffold_col ), pos_col=int( options.pos_col ) )
+
+    out_fh = gd._openfile( options.output, 'w' )
+
+    snpcalls_file = gd.get_filename_from_loc( options.species, options.snps_loc )
+    file_root, file_ext = os.path.splitext( snpcalls_file )
+    snpcalls_index_file = file_root + ".cdb"
+    snpcalls = gd.SnpcallsFile( data_file=snpcalls_file, index_file=snpcalls_index_file )
+
+    while snps.next():
+        seq, pos = snps.get_seq_pos()
+        flanking_dna = snpcalls.get_flanking_dna( sequence=seq, position=pos, format=options.output_format )
+        if flanking_dna:
+            out_fh.write( flanking_dna )
+
+    out_fh.close()
+
+if __name__ == "__main__":
+    main()
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/extract_flanking_dna.xml	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,109 @@
+<tool id="gd_extract_flanking_dna" name="Flanking Sequence" version="1.0.0">
+  <description>: Fetch DNA sequence for intervals surrounding the given SNPs</description>
+
+  <command interpreter="python">
+    extract_flanking_dna.py "--input=$input" "--output=$output" "--snps_loc=${GALAXY_DATA_INDEX_DIR}/gd.snps.loc"
+    #if $override_metadata.choice == "0":
+      "--scaffold_col=${input.metadata.scaffold}" "--pos_col=${input.metadata.pos}" "--species=${input.metadata.species}"
+    #else
+      "--scaffold_col=$scaf_col" "--pos_col=$pos_col" "--species=$species"
+    #end if
+    "--output_format=$output_format"
+  </command>
+
+  <inputs>
+    <param format="tabular" name="input" type="data" label="SNP dataset"/>
+    <param name="output_format" type="select" format="integer" label="Output format">
+        <option value="fasta" selected="true">FastA format</option>
+        <option value="primer3">Boulder-IO (for Primer3)</option>
+    </param>
+    <conditional name="override_metadata">
+      <param name="choice" type="select" format="integer" label="Choose columns" help="Datasets in gd_snp format have the columns in the metadata, all others need the columns chosen." >
+        <option value="0" selected="true">No, get columns from metadata</option>
+        <option value="1" >Yes, choose columns</option>
+      </param>
+      <when value="0" />
+      <when value="1">
+        <param name="scaf_col" type="data_column" data_ref="input" numerical="false" label="Column with scaffold"/>
+        <param name="pos_col" type="data_column" data_ref="input" numerical="true" label="Column with position"/>
+        <param name="species" type="select" label="Choose species">
+          <options from_file="gd.species.txt">
+            <column name="name" index="1"/>
+            <column name="value" index="0"/>
+          </options>
+        </param>
+      </when>
+    </conditional>
+  </inputs>
+
+  <outputs>
+    <data format="txt" name="output"/>
+  </outputs>
+
+  <!-- Need snpcalls files from Webb before uncommenting
+  <tests>
+    <test>
+      <param name="input" value="test_out/select_snps/select_snps.gd_snp" ftype="gd_snp" />
+      <param name="output_format" value="primer3" />
+      <param name="choice" value="0" />
+      <output name="output" file="test_out/extract_flanking_dna/extract_flanking_dna.txt" />
+    </test>
+  </tests>
+  -->
+
+  <help>
+
+**Dataset formats**
+
+The input dataset is in tabular_ format and must contain a scaffold or
+chromosome column and a position column.  The output is in fasta_ format or
+Boulder-IO_ format used by Primer3.
+(`Dataset missing?`_)
+
+.. _tabular: ./static/formatHelp.html#tab
+.. _fasta: ./static/formatHelp.html#fasta
+.. _Boulder-IO: ./static/formatHelp.html#boulder
+.. _Dataset missing?: ./static/formatHelp.html
+
+-----
+
+**What it does**
+
+This tool reports a DNA segment containing each SNP, with up to 200 nucleotides
+on either side of the SNP position, which is indicated by "n". Fewer nucleotides
+are reported if the SNP is near an end of the assembled genome fragment.
+
+-----
+
+**Example**
+
+- input (gd_snp format)::
+
+    chr2_75111355_75112576    314  A  C  L  F  chr2   75111676  C  F  15  4  53   2   9  48   Y  96   0.369  0.355  0.396  0
+    chr8_93901796_93905612   2471  A  C  A  A  chr8   93904264  A  A  8   0  51   10  2  14   Y  961  0.016  0.534  0.114  2
+    chr10_7434473_7435447    524   T  C  S  S  chr10  7435005   T  S  11  5  90   14  0  69   Y  626  0.066  0.406  0.727  0
+    chr14_80021455_80022064  138   G  A  H  H  chr14  80021593  G  H  14  0  69   9   6  124  Y  377  0.118  0.997  0.195  1
+    chr15_64470252_64471048  89    G  A  Y  Y  chr15  64470341  G  Y  5   6  109  14  0  69   Y  312  0.247  0.998  0.393  0
+    chr18_48070585_48071386  514   C  T  E  K  chr18  48071100  T  K  7   7  46   14  0  69   Y  2    0.200  0.032  0.163  0
+    chr18_50154905_50155664  304   A  G  Y  C  chr18  50155208  A  Y  4   2  17   5   1  22   Y  8    0.022  0.996  0.128  0
+    chr18_57379354_57380496  315   C  T  V  V  chr18  57379669  G  V  11  0  60   9   6  62   Y  726  0.118  0.048  0.014  1
+    chr19_14240610_14242055  232   C  T  A  V  chr19  14240840  C  A  18  8  56   15  5  42   Y  73   0.003  0.153  0.835  0
+    chr19_39866997_39874915  3117  C  T  P  P  chr19  39870110  C  P  3   7  65   14  2  32   Y  6    0.321  0.911  0.462  4
+    etc.
+
+- output (FastA format)::
+
+    > chr2_75111355_75112576 314 A C
+    TATCTTCATTTTTATTATAGACTCTCTGAACCAATTTGCCCTGAGGCAGACTTTTTAAAGTACTGTGTAATGTATGAAGTCCTTCTGCTCAAGCAAATCATTGGCATGAAAACAGTTGCAAACTTATTGTGAGAGAAGAGTCCAAGAGTTTTAACAGTCTGTAAGTATATAGCCTGTGAGTTTGATTTCCTTCTTGTTTTTnTTCCAGAAACATGATCAGGGGCAAGTTCTATTGGATATAGTCTTCAAGCATCTTGATTTGACTGAGCGTGACTATTTTGGTTTGCAGTTGACTGACGATTCCACTGATAACCCAGTAAGTTTAAGCTGTTGTCTTTCATTGTCATTGCAATTTTTCTGTCTTTATACTAGGTCCTTTCTGATTTACATTGTTCACTGATT
+    > chr8_93901796_93905612 2471 A C
+    GCTGCCGCTGGATTTACTTCTGCTTGGGTCGAGAGCGGGCTGGATGGGTGAAGAGTGGGCTCCCCGGCCCCTGACCAGGCAGGTGCAGACAAGTCGGAAGAAGGCCCGCCGCATCTCCTTGCTGGCCAGCGTGTAGATGACGGGGTTCATGGCAGAGTTGAGCACGGCCAGCACGATGAACCACTGGGCCTTGAACAGGATnGCGCACTCCTTCACCTTGCAGGCCACATCCACAAGGAAAAGGATGAAGAGTGGGGACCAGCAGGCGATGAACACGCTCACCACGATCACCACGGTCCGCAGCAGGGCCATGGACCGCTCTGAGTTGTGCGGGCTGGCCACCCTGCGGCTGCTGGACTTCACCAGGAAGTAGATGCGTGCGTACAGGATCACGATGGTCAC
+    > chr10_7434473_7435447 524 T C
+    ATTATTAACAGAAACATTTCTTTTTCATTACCCAGGGGTTACACTGGTCGTTGATGTTAATCAGTTTTTGGAGAAGGAGAAGCAAAGTGATATTTTGTCTGTTCTGAAGCCTGCCGTTGGTAATACAAATGACGTAATCCCTGAATGTGCTGACAGGTACCATGACGCCCTGGCAAAAGCAAAAGAGCAAAAATCTAGAAGnGGTAAGCATCTTCACTGTTTAGCACAAATTAAATAGCACTTTGAATATGATGATTTCTGTGGTATTGTGTTATCTTACTTTTGAGACAAATAATCGCTTTCAAATGAATATTTCTGAATGTTTGTCATCTCTGGCAAGGAAATTTTTTAGTGTTTCTTTTCCTTTTTTGTCTTTTGGAAATCTGTGATTAACTTGGTGGC
+    > chr14_80021455_80022064 138 G A
+    ACCCAGGGATCAAACCCAGGTCTCCCGCATTGCAGGCGGATTCTTTACTGTCTGAGCCTCCAGGGAAGCCCTCGGGGCTGAAGGGATGGTTATGAAGGTGAGAAACAGGGGCCACCTGTCCCCAAGGTACCTTGCGACnTGCCATCTGCGCTCCACCAGTAAATGGACGTCTTCGATCCTTCTGTTGTTGGCGTAGTGCAAACGTTTGGGAAGGTGCTGTTTCAAGTAAGGCTTAAAGTGCTGGTCTGGTTTTTTACACTGAAATATAAATGGACATTGGATTTTGCAATGGAGAGTCTTCTAGAAGAGTCCAAGACATTCTCTCCAGAAAGCTGAAGG
+    > chr15_64470252_64471048 89 G A
+    TGTGTGTGTGTGTGTGTGTGTGTGCCTGTGTCTGTACATGCACACCACGTGGCCTCACCCAGTGCCCTCAGCTCCATGGTGATGTCCACnTAGCCGTGCTCCGCGCTGTAGTACATGGCCTCCTGGAGGGCCTTGGTGCGCGTCCGGCTCAGGCGCATGGGCCCCTCGCTGCCGCTGCCCTGGCTGGATGCATCGCTCTCTTCCACGCCCTCAGCCAGGATCTCCTCCAGGGACAGCACATCTGCTTTGGCCTGCTGTGGCTGAGTCAGGAGCTTCCTCAGGACGTTCCT
+    etc.
+
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/extract_primers.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,85 @@
+#!/usr/bin/env python
+
+import os
+import sys
+from optparse import OptionParser
+import genome_diversity as gd
+
+def main_function( parse_arguments=None ):
+    if parse_arguments is None:
+        parse_arguments = lambda arguments: ( None, arguments )
+    def main_decorator( to_decorate ):
+        def decorated_main( arguments=None ):
+            if arguments is None:
+                arguments = sys.argv
+            options, arguments = parse_arguments( arguments )
+            rc = 1
+            try:
+                rc = to_decorate( options, arguments )
+            except Exception, err:
+                sys.stderr.write( 'ERROR: %s\n' % str( err ) )
+                traceback.print_exc()
+            finally:
+                sys.exit( rc )
+        return decorated_main
+    return main_decorator
+
+def parse_arguments( arguments ):
+    parser = OptionParser()
+    parser.add_option('--input',
+                        type='string', dest='input',
+                        help='file of selected SNPs')
+    parser.add_option('--output',
+                        type='string', dest='output',
+                        help='output file')
+    parser.add_option('--primers_loc',
+                        type='string', dest='primers_loc',
+                        help='primers .loc file')
+    parser.add_option('--scaffold_col',
+                        type="int", dest='scaffold_col',
+                        help='scaffold column in the input file')
+    parser.add_option('--pos_col',
+                        type="int", dest='pos_col',
+                        help='position column in the input file')
+    parser.add_option('--species',
+                        type="string", dest='species',
+                        help='species')
+    return parser.parse_args( arguments[1:] )
+
+
+@main_function( parse_arguments )
+def main( options, arguments ):
+    if not options.input:
+        raise RuntimeError( 'missing --input option' )
+    if not options.output:
+        raise RuntimeError( 'missing --output option' )
+    if not options.primers_loc:
+        raise RuntimeError( 'missing --primers_loc option' )
+    if not options.scaffold_col:
+        raise RuntimeError( 'missing --scaffold_col option' )
+    if not options.pos_col:
+        raise RuntimeError( 'missing --pos_col option' )
+    if not options.species:
+        raise RuntimeError( 'missing --species option' )
+
+    snps = gd.SnpFile( filename=options.input, seq_col=int( options.scaffold_col ), pos_col=int( options.pos_col ) )
+
+    out_fh = gd._openfile( options.output, 'w' )
+
+    primer_data_file = gd.get_filename_from_loc( options.species, options.primers_loc )
+
+    file_root, file_ext = os.path.splitext( primer_data_file )
+    primer_index_file = file_root + ".cdb"
+    primers = gd.PrimersFile( data_file=primer_data_file, index_file=primer_index_file )
+
+    while snps.next():
+        seq, pos = snps.get_seq_pos()
+        primer = primers.get_entry( seq, pos )
+        if primer:
+            out_fh.write( primer )
+
+    out_fh.close()
+
+if __name__ == "__main__":
+    main()
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/extract_primers.xml	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,106 @@
+<tool id="gd_extract_primers" name="Pick Primers" version="1.0.0">
+  <description>: Find suitable PCR primers for SNPs</description>
+
+  <command interpreter="python">
+    extract_primers.py "--input=$input" "--output=$output" "--primers_loc=${GALAXY_DATA_INDEX_DIR}/gd.primers.loc"
+    #if $override_metadata.choice == "0":
+      "--scaffold_col=${input.metadata.scaffold}" "--pos_col=${input.metadata.pos}" "--species=${input.metadata.species}"
+    #else
+      "--scaffold_col=$scaf_col" "--pos_col=$pos_col" "--species=$species"
+    #end if
+  </command>
+
+  <inputs>
+    <param format="tabular" name="input" type="data" label="SNP dataset"/>
+    <conditional name="override_metadata">
+      <param name="choice" type="select" format="integer" label="Choose columns" help="Datasets in gd_snp format have the columns in the metadata, all others need the columns chosen." >
+        <option value="0" selected="true">No, get columns from metadata</option>
+        <option value="1" >Yes, choose columns</option>
+      </param>
+      <when value="0" />
+      <when value="1">
+        <param name="scaf_col" type="data_column" data_ref="input" numerical="false" label="Column with scaffold"/>
+        <param name="pos_col" type="data_column" data_ref="input" numerical="true" label="Column with position"/>
+        <param name="species" type="select" label="Choose species">
+          <options from_file="gd.species.txt">
+            <column name="name" index="1"/>
+            <column name="value" index="0"/>
+          </options>
+        </param>
+      </when>
+    </conditional>
+  </inputs>
+
+  <outputs>
+    <data format="txt" name="output"/>
+  </outputs>
+
+  <tests>
+    <test>
+      <param name="input" value="test_out/select_snps/select_snps.gd_snp" ftype="gd_snp" />
+      <param name="choice" value="0"/>
+      <output name="output" file="test_out/extract_primers/extract_primers.txt" />
+    </test>
+  </tests>
+
+
+  <help>
+
+**Dataset formats**
+
+The input dataset is in tabular_ format and must contain a scaffold or
+chromosome column and a position column.  The output dataset is in text_
+format as described below.
+(`Dataset missing?`_)
+
+.. _tabular: ./static/formatHelp.html#tab
+.. _text: ./static/formatHelp.html#text
+.. _Dataset missing?: ./static/formatHelp.html
+
+-----
+
+**What it does**
+
+This tool extracts primers for SNPs in the dataset using the Primer3 program
+(Steve Rozen and Helen J. Skaletsky, 2000).
+The first line of output for a given SNP reports the name of the assembled
+contig, the SNP's position in the contig, the two variant nucleotides, and
+Primer3's "pair penalty".  The next line, if not blank, names restriction
+enzymes (from the user-adjustable list) that differentially cut at that
+site, but do not cut at any other position between and including the
+primer positions.  The next lines show the SNP's flanking regions, with
+the SNP position indicated by "n", including the primer positions and an
+additional 3 nucleotides.
+<!-- is this precomputed?? how, where is the user-adjustable list? -->
+
+-----
+
+**Example**
+
+- input (gd_snp format)::
+
+    chr5_30800874_30802049    734   G  A  chr5   30801606   A  24  0  99   4  11  97   Y  496  0.502  0.033  0.215  6
+    chr8_55117827_55119487    994   A  G  chr8   55118815   G  25  0  102  4  11  96   Y  22   0.502  0.025  2.365  1
+    chr9_100484836_100485311  355   C  T  chr9   100485200  T  27  0  108  6  17  100  Y  190  0.512  0.880  2.733  4
+    chr12_3635530_3637738     2101  T  C  chr12  3637630    T  25  0  102  4  13  93   Y  169  0.554  0.024  0.366  4
+    etc.
+
+- output::
+
+    chr5_30800874_30802049 734 G A 0.352964
+     BglII,MboI,Sau3AI,Tru9I,XhoII
+      1 CTGAAGGTGAGCAGGATTCAGGAGACAGAAAACAAAGCCCAGGCCTGCCCAAGGTGGAAA
+           >>>>>>>>>>>>>>>>>>>>
+
+     61 AGTCTAACAACTCGCCCTCTGCTTAnATCTGAGACTCACAGGGATAATAACACACTTGGT
+
+
+     21 CAAGGAATAAACTAGATATTATTCACTCCTCTAGAAGGCTGCCAGGAAAATTGCCTGACT
+                                                             &lt;&lt;&lt;&lt;&lt;&lt;&lt;
+
+    181 TGAACCTTGGCTCTGA
+        &lt;&lt;&lt;&lt;&lt;&lt;&lt;&lt;&lt;&lt;&lt;&lt;&lt;
+    etc.
+
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/filter_gd_snp.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,83 @@
+#!/usr/bin/env python
+
+import gd_util
+import sys
+from Population import Population
+
+################################################################################
+
+def convert_percent(string_value):
+    if string_value.endswith('%'):
+        val = convert_non_negative_int(string_value[:-1])
+        if val > 100:
+            print >> sys.stderr, 'percentage: "%d" > 100' % val
+            sys.exit(1)
+        val = val * -1
+    else:
+        val = convert_non_negative_int(string_value)
+
+    return str(val)
+
+def convert_non_negative_int(string_value):
+    try:
+        val = int(string_value)
+    except:
+        print >> sys.stderr, '"%s" is not an integer' % string_value
+        sys.exit(1)
+
+    if val < 0:
+        print >> sys.stderr, '"%d" is negative' % val
+        sys.exit(1)
+
+    return val
+
+################################################################################
+
+if len(sys.argv) != 13:
+    gd_util.die('Usage')
+
+input, output, ref_chrom_col, min_spacing, lo_genotypes, p1_input, input_type, lo_coverage, hi_coverage, low_ind_cov, low_quality, ind_arg = sys.argv[1:]
+
+p_total = Population()
+p_total.from_wrapped_dict(ind_arg)
+
+p1 = Population()
+p1.from_population_file(p1_input)
+
+if not p_total.is_superset(p1):
+    gd_util.die('There is an individual in the population that is not in the SNP table')
+
+lo_coverage = convert_percent(lo_coverage)
+hi_coverage = convert_percent(hi_coverage)
+
+if input_type == 'gd_snp':
+    type_arg = 1
+elif input_type == 'gd_genotype':
+    type_arg = 0
+else:
+    gd_util.die('unknown input_type: {0}'.format(input_type))
+
+################################################################################
+
+prog = 'filter_snps'
+
+args = [ prog ]
+args.append(input)          # file containing a Galaxy table
+args.append(type_arg)       # 1 for a gd_snp file, 0 for gd_genotype
+args.append(lo_coverage)    # lower bound on total coverage (< 0 means interpret as percentage)
+args.append(hi_coverage)    # upper bound on total coveraae (< 0 means interpret as percentage)
+args.append(low_ind_cov)    # lower bound on individual coverage
+args.append(low_quality)    # lower bound on individual quality value
+args.append(lo_genotypes)   # lower bound on the number of defined genotypes
+args.append(min_spacing)    # lower bound on the spacing between SNPs
+args.append(ref_chrom_col)  # reference-chromosome column (base-1); ref position in next column
+
+columns = p1.column_list()
+for column in sorted(columns):
+    args.append(column)     # the starting columns (base-1) for the chosen individuals
+
+with open(output, 'w') as fh:
+    gd_util.run_program(prog, args, stdout=fh)
+
+sys.exit(0)
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/filter_gd_snp.xml	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,146 @@
+<tool id="gd_filter_gd_snp" name="Filter SNPs" version="1.2.0">
+  <description>: Discard some SNPs based on coverage, quality or spacing</description>
+
+  <command interpreter="python">
+    #import json
+    #import base64
+    #import zlib
+    #set $ind_names = $input.dataset.metadata.individual_names
+    #set $ind_colms = $input.dataset.metadata.individual_columns
+    #set $ind_dict = dict(zip($ind_names, $ind_colms))
+    #set $ind_json = json.dumps($ind_dict, separators=(',',':'))
+    #set $ind_comp = zlib.compress($ind_json, 9)
+    #set $ind_arg = base64.b64encode($ind_comp)
+    filter_gd_snp.py '$input' '$output'
+    #if str($input.dataset.metadata.dbkey) == '?'
+      '0'
+    #else
+      '$input.dataset.metadata.ref'
+    #end if
+    '$min_spacing' '$lo_genotypes' '$input_type.p1_input'
+    #if $input_type.choice == '0'
+      'gd_snp' '$input_type.lo_coverage' '$input_type.hi_coverage' '$input_type.low_ind_cov' '$input_type.lo_quality'
+    #else if $input_type.choice == '1'
+      'gd_genotype' '0' '0' '0' '0'
+    #end if
+    '$ind_arg'
+  </command>
+
+  <inputs>
+    <conditional name="input_type">
+      <param name="choice" type="select" format="integer" label="Input format">
+        <option value="0" selected="true">gd_snp</option>
+        <option value="1">gd_genotype</option>
+      </param>
+      <when value="0">
+        <param name="input" type="data" format="gd_snp" label="SNP dataset" />
+        <param name="p1_input" type="data" format="gd_indivs" label="Population individuals" />
+        <param name="lo_coverage" type="text" value="0" label="Lower bound on total coverage">
+          <sanitizer>
+            <valid initial="string.digits">
+              <!-- &#37; is the percent (%) character -->
+              <add value="&#37;" />
+            </valid>
+          </sanitizer>
+        </param>
+        <param name="hi_coverage" type="text" value="1000" label="Upper bound on total coverage">
+          <sanitizer>
+            <valid initial="string.digits">
+              <!-- &#37; is the percent (%) character -->
+              <add value="&#37;" />
+            </valid>
+          </sanitizer>
+        </param>
+        <param name="low_ind_cov" type="integer" min="0" value="0" label="Lower bound on individual coverage" />
+        <param name="lo_quality" type="integer" min="0" value="0" label="Lower bound on individual quality values" />
+      </when>
+      <when value="1">
+        <param name="input" type="data" format="gd_genotype" label="Genotype dataset" />
+        <param name="p1_input" type="data" format="gd_indivs" label="Population individuals" />
+      </when>
+    </conditional>
+    <param name="min_spacing" type="integer" min="0" value="0" label="Minimum spacing between SNPs" />
+    <param name="lo_genotypes" type="integer" min="0" value="0" label="Lower bound on the number of defined genotypes" />
+  </inputs>
+
+  <outputs>
+    <data name="output" format="input" format_source="input" metadata_source="input" />
+  </outputs>
+
+  <requirements>
+    <requirement type="package" version="0.1">gd_c_tools</requirement>
+  </requirements>
+
+  <tests>
+    <test>
+      <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" />
+      <param name="p1_input" value="test_in/a.gd_indivs" ftype="gd_indivs" />
+      <param name="lo_coverage" value="0" />
+      <param name="hi_coverage" value="1000" />
+      <param name="low_ind_cov" value="3" />
+      <param name="lo_quality" value="30" />
+      <output name="output" file="test_out/modify_snp_table/modify.gd_snp" />
+    </test>
+  </tests>
+
+  <help>
+
+**Dataset formats**
+
+The input datasets are in gd_snp_, gd_genotype_, and gd_indivs_ formats.
+The output dataset is in gd_snp_ or gd_genotype_ format.  (`Dataset missing?`_)
+
+.. _gd_snp: ./static/formatHelp.html#gd_snp
+.. _gd_genotype: ./static/formatHelp.html#gd_genotype
+.. _gd_indivs: ./static/formatHelp.html#gd_indivs
+.. _Dataset missing?: ./static/formatHelp.html
+
+-----
+
+**What it does**
+
+For a gd_snp dataset, the user specifies that some of the individuals
+form a "population", by supplying a list that has been previously created
+using the Specify Individuals tool.  SNPs are then discarded if their
+total coverage for the population is too low or too high, or if their
+coverage or quality score for any individual in the population is too low.
+
+The upper and lower bounds on total population coverage can be specified
+either as read counts or as percentiles (e.g. "5%", with no decimal
+places).  For percentile bounds the SNPs are ranked by read count, so
+for example, a lower bound of "10%" means that the least-covered 10%
+of the SNPs will be discarded, while an upper bound of, say, "80%" will
+discard all SNPs above the 80% mark, i.e. the top 20%.  The threshold
+for the lower bound on individual coverage can only be specified as a
+plain read count.
+
+For either a gd_snp or gd_genotype dataset, the user can specify a
+minimum number of defined genotypes (i.e., not -1) and/or a minimum
+spacing relative to the reference sequence.  An error is reported if the
+user requests a minimum spacing but no reference sequence is available.
+
+-----
+
+**Example**
+
+- input gd_snp::
+
+    Contig161_chr1_4641264_4641879   115  C  T  73.5   chr1   4641382  C   6  0  2  45   8  0  2  51   15  0  2  72   5  0  2  42   6  0  2  45   10  0  2  57   Y  54  0.323  0
+    Contig48_chr1_10150253_10151311   11  A  G  94.3   chr1  10150264  A   1  0  2  30   1  0  2  30    1  0  2  30   3  0  2  36   1  0  2  30    1  0  2  30   Y  22  +99.   0
+    Contig20_chr1_21313469_21313570   66  C  T  54.0   chr1  21313534  C   4  0  2  39   4  0  2  39    5  0  2  42   4  0  2  39   4  0  2  39    5  0  2  42   N   1  +99.   0
+    etc.
+
+- input individuals::
+
+    9   PB1
+    13  PB2
+    17  PB3
+
+- output when the lower bound on individual coverage is "3"::
+
+    Contig161_chr1_4641264_4641879   115  C  T  73.5   chr1   4641382  C   6  0  2  45   8  0  2  51   15  0  2  72   5  0  2  42   6  0  2  45   10  0  2  57   Y  54  0.323  0
+    Contig20_chr1_21313469_21313570   66  C  T  54.0   chr1  21313534  C   4  0  2  39   4  0  2  39    5  0  2  42   4  0  2  39   4  0  2  39    5  0  2  42   N   1  +99.   0
+    etc.
+
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/find_intervals.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,115 @@
+#!/usr/bin/env python
+
+import errno
+import os
+import subprocess
+import sys
+
+################################################################################
+
+def mkdir_p(path):
+    try:
+        os.makedirs(path)
+    except OSError, e:
+        if e.errno <> errno.EEXIST:
+            raise
+
+def run_program(prog, args, stdout_file=None):
+    #print "args:", ' '.join(args)
+    p = subprocess.Popen(args, bufsize=-1, executable=prog, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    (stdoutdata, stderrdata) = p.communicate()
+    rc = p.returncode
+
+    if stdout_file is not None:
+        with open(stdout_file, 'w') as ofh:
+            print >> ofh, stdoutdata.rstrip('\r\n')
+
+    if rc != 0:
+        print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args))
+        print >> sys.stderr, stderrdata
+        sys.exit(1)
+
+################################################################################
+
+if len(sys.argv) != 11:
+    print "usage"
+    sys.exit(1)
+
+input, dbkey, output, output_files_path, chrom_col, pos_col, score_col, shuffles, cutoff, report_snps = sys.argv[1:11]
+
+prog = 'sweep'
+
+args = [ prog ]
+args.append(input)
+args.append(chrom_col)
+args.append(pos_col)
+args.append(score_col)
+args.append(cutoff)
+args.append(shuffles)
+args.append(report_snps)
+
+run_program(None, args, stdout_file=output)
+
+if report_snps == "0":
+    sys.exit(0)
+
+################################################################################
+
+mkdir_p(output_files_path)
+
+bedgraph_filename = 'bedgraph.txt'
+links_filename = os.path.join(output_files_path, 'links.txt')
+
+data = []
+links_data = []
+
+with open(output) as fh:
+    chrom = None
+    for line in fh:
+        line = line.rstrip('\r\n')
+        if not line:
+            continue
+        if line[0] != ' ':
+            # chrom line, add a link
+            chrom, interval_begin, interval_end, interval_value = line.split('\t')
+            links_data.append((chrom, int(interval_begin), int(interval_end)))
+        else:
+            # data line, add a bedgraph line
+            begin, value = line.split()
+            data.append((chrom, int(begin), value))
+
+with open(bedgraph_filename, 'w') as ofh:
+    print >> ofh, 'track type=bedGraph'
+    for chrom, begin, value in sorted(data):
+        print >> ofh, chrom, begin, begin+1, value
+
+with open(links_filename, 'w') as ofh:
+    for chrom, begin, end in sorted(links_data):
+        print >> ofh, chrom, begin, end
+
+################################################################################
+
+chrom_sizes_filename = '{0}.chrom.sizes'.format(dbkey)
+
+prog = 'fetchChromSizes'
+
+args = [ prog ]
+args.append(dbkey)
+
+run_program(None, args, stdout_file=chrom_sizes_filename)
+
+################################################################################
+
+prog = 'bedGraphToBigWig'
+
+args = [ prog ]
+args.append(bedgraph_filename)
+args.append(chrom_sizes_filename)
+args.append(output)
+
+run_program(None, args)
+
+################################################################################
+
+sys.exit(0)
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/find_intervals.xml	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,162 @@
+<tool id="gd_find_intervals" name="Remarkable Intervals" version="1.1.0">
+  <description>: Find high-scoring runs of SNPs</description>
+
+  <command interpreter="python">
+    find_intervals.py "$input" "$input.metadata.dbkey" "$output" "$output.files_path"
+
+    #if $override_metadata.choice == "0"
+      "$input.metadata.ref" "$input.metadata.rPos"
+    #else
+      "$override_metadata.ref_col" "$override_metadata.rpos_col"
+    #end if
+
+    "$score_col" "$shuffles"
+
+    #if $cutoff.type == 'percentage'
+      "$cutoff.cutoff_pct"
+    #else
+      "=$cutoff.cutoff_val"
+    #end if
+
+    "$out_format"
+  </command>
+
+  <inputs>
+    <param name="input" type="data" format="tabular" label="Dataset">
+      <validator type="unspecified_build" message="This dataset does not have a reference species and cannot be used with this tool" />
+    </param>
+
+    <param name="score_col" type="data_column" data_ref="input" numerical="true" label="Column with score"/>
+
+    <conditional name="cutoff">
+      <param name="type" type="select" label="Score-shift type">
+        <option value="percentage">percentage</option>
+        <option value="value">value</option>
+      </param>
+      <when value="percentage">
+        <param name="cutoff_pct" type="float" value="95" min="0" max="100" label="Percentage score-shift"/>
+      </when>
+      <when value="value">
+        <param name="cutoff_val" type="float" value="0.0" label="Value score-shift"/>
+      </when>
+    </conditional>
+
+    <param name="shuffles" type="integer" min="0" value="0" label="Number of randomizations"/>
+
+    <param name="out_format" type="select" format="integer" label="Report individual positions">
+      <option value="0" selected="true">no</option>
+      <option value="1">yes</option>
+    </param>
+
+    <conditional name="override_metadata">
+      <param name="choice" type="select" format="integer" label="Choose columns" help="Note: you must choose the columns if the input dataset is neither gd_snp nor gd_genotype.">
+        <option value="0" selected="true">no, get columns from metadata</option>
+        <option value="1" >yes, choose columns here</option>
+      </param>
+      <when value="0" />
+      <when value="1">
+        <param name="ref_col" type="data_column" data_ref="input" numerical="false" label="Column with reference chromosome" help="Note: be sure this corresponds to the build recorded in the metadata."/>
+        <param name="rpos_col" type="data_column" data_ref="input" numerical="true" label="Column with reference position" help="Note: either zero-based or one-based positions will work."/>
+      </when>
+    </conditional>
+  </inputs>
+
+  <outputs>
+    <data name="output" format="interval">
+        <change_format>
+            <when input="out_format" value="1" format="bigwigpos" />
+        </change_format>
+    </data>
+  </outputs>
+
+  <requirements>
+    <requirement type="package" version="0.1">gd_c_tools</requirement>
+  </requirements>
+
+  <tests>
+    <test>
+      <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" />
+      <param name="score_col" value="5" />
+      <param name="type" value="value" />
+      <param name="cutoff_val" value="700.0" />
+      <param name="shuffles" value="10" />
+      <param name="out_format" value="0" />
+      <param name="choice" value="0" />
+
+      <output name="output" file="test_out/find_intervals/find_intervals.interval" />
+    </test>
+  </tests>
+
+  <help>
+**Dataset formats**
+
+The input dataset is tabular_ (which includes gd_snp_ and gd_genotype_),
+with required columns of chromosome, position, and score (in any column).
+The output dataset is interval_. (`Dataset missing?`_)
+
+.. _tabular: ./static/formatHelp.html#tab
+.. _gd_snp: ./static/formatHelp.html#gd_snp
+.. _gd_genotype: ./static/formatHelp.html#gd_genotype
+.. _interval: ./static/formatHelp.html#interval
+.. _Dataset missing?: ./static/formatHelp.html
+
+-----
+
+**What it does**
+
+The user selects a tabular dataset (such as the SNV formats gd_snp and
+gd_genotype) and if the dataset is not in an SNV format, specifies the
+columns containing chromosome, position, and scores (such as an FST-value
+for the SNP).  With SNV formats, the metadata tells which columns hold the
+chromosome and position.  Other inputs include a percentage or raw score
+for the "score-shift" which should be greater than the average value
+for the scores column.  A higher value will give smaller intervals in
+the output.  If a percentage (e.g. 95%) is specified then that percentile
+of the scores is used as the shift; percentile may not work well if many
+rows or SNPs have the same score (in that case use a raw score).
+
+The program subtracts the shift from every score, then finds genomic
+intervals (i.e., consecutive runs of SNPs) whose total score cannot be
+increased by adding or subtracting one or more adjusted scores at the
+ends of the interval.  Another input is the number of times the data
+should be randomized (only intervals with score exceeding the maximum
+for the randomized data are reported).  If 100 shuffles are requested,
+then any interval reported by the tool has a score with probability
+less than 0.01 of being equaled or exceeded by chance, assuming that
+the scores vary independently by position.
+
+-----
+
+**Example**
+
+- Input (showing only the chromosome, position, and score columns)::
+
+    chr2      39      0.40
+    chr2     103      0.97
+    chr2     188      0.72
+    chr2     203      0.68
+    chr2     321      0.92
+    ...
+    chr2    1132      0.85
+    chr2    1321      0.34
+    ...
+
+- Suppose the user-specified score-shift is 0.75.  This value is subtracted from each score, giving::
+
+    chr2      39     -0.35
+    chr2     103      0.22
+    chr2     188     -0.03
+    chr2     203     -0.07
+    chr2     321      0.17
+    ...
+    chr2    1132      0.10
+    chr2    1321     -0.41
+    ...
+
+- The output, not reporting individual positions, might be (depending on the values not shown above)::
+
+    chr2    103    1132    1.42
+  </help>
+</tool>
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/gd_composite.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,128 @@
+#!/usr/bin/env python
+
+from galaxy import eggs
+import pkg_resources
+pkg_resources.require( "Cheetah" )
+from Cheetah.Template import Template
+
+import errno
+import os
+from datetime import datetime
+
+################################################################################
+
+def die(message):
+    print >> sys.stderr, message
+    sys.exit(1)
+
+def mkdir_p(path):
+    try:
+        os.makedirs(path)
+    except OSError, e:
+        if e.errno <> errno.EEXIST:
+            raise
+
+################################################################################
+
+class Display(object):
+    def display(self, parameter):
+        print parameter
+
+class DisplayFile(Display):
+    def display(self, parameter):
+        return '<a href="{0}">{1}</a>'.format(parameter.value, parameter.name)
+
+class DisplayValue(Display):
+    def display(self, parameter):
+        if parameter.value is not None:
+            return '{0}: {1}'.format(parameter.description, parameter.value)
+        else:
+            return '{0}'.format(parameter.description)
+
+class DisplayTagList(Display):
+    def display(self, parameter):
+        rv = []
+        if parameter.name:
+            rv.append(parameter.name)
+        rv.append('<ol>')
+        for tag in parameter.value:
+            col, individual_name = tag.split(':')
+            rv.append('<li>{0}</li>'.format(individual_name))
+        rv.append('</ol>')
+        return '\n'.join(rv)
+
+class DisplayPopulationList(Display):
+    def display(self, parameter):
+        rv = []
+        rv.append('Populations')
+        rv.append('<ul>')
+        for population in parameter.value:
+            rv.append('<li>')
+            if population.name is not None:
+                rv.append(population.name)
+            rv.append('<ol>')
+            for name in population.individual_names():
+                rv.append('<li>{0}</li>'.format(name))
+            rv.append('</ol>')
+            rv.append('</li>')
+        rv.append('</ul>')
+        return '\n'.join(rv)
+
+#    def display(self, parameter, name=''):
+#        print '<ul> {0}'.format(name)
+#        for individual_name in parameter.individual_names():
+#            print '<li>{0}>/li>'.format(individual_name)
+#        print '</ul>'
+
+
+class Parameter(object):
+    def __init__(self, name=None, value=None, description=None, display_type=None):
+        self.name = name
+        self.value = value
+        self.description = description
+        if display_type is None:
+            self.display_type = Display()
+        else:
+            self.display_type = display_type
+
+    def display(self):
+        return self.display_type.display(self)
+
+class InfoPage(object):
+    _realpath = os.path.realpath(__file__)
+    _script_dir = os.path.dirname(_realpath)
+    template_file = os.path.join(_script_dir, 'gd_composite_template.html')
+    def __init__(self):
+        self.timestamp = datetime.now().strftime('%Y-%m-%d %I:%M:%S %p')
+        self.title = 'Genome Diversity Composite Dataset'
+        self.inputs = []
+        self.outputs = []
+        self.misc = ''
+        self.template = self.load_template()
+
+    def load_template(self):
+        with open(self.template_file) as f:
+            return f.read().rstrip('\r\n')
+
+    def set_title(self, title):
+        self.title = title
+
+    def add_input_parameter(self, parameter):
+        self.inputs.append(parameter)
+
+    def add_output_parameter(self, parameter):
+        self.outputs.append(parameter)
+
+    def add_misc(self, misc):
+        self.misc = misc
+
+    def render(self):
+        return Template(self.template, searchList=[{'tool': self}])
+
+
+
+
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/gd_composite_template.html	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,40 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <meta http-equiv="Content-type" content="text/html; charset=UTF-8" />
+    <link rel="stylesheet" href="/static/style/base.css" type="text/css" />
+    <title>${tool.title}</title>
+  </head>
+  <body>
+    <div class="document">
+      Output completed: $tool.timestamp
+      <p/>
+      #if $tool.outputs
+      <div id="gd_outputs">
+        Outputs
+        <ul>
+          #for output in $tool.outputs
+            <li>${output.display()}</li>
+          #end for
+        </ul>
+      </div>
+      #end if
+      #if $tool.inputs
+      <div id="gd_inputs">
+        Inputs
+        <ul>
+          #for input in $tool.inputs
+            <li>${input.display()}</li>
+          #end for
+        </ul>
+      </div>
+      #end if
+      #if $tool.misc
+      <div id="gd_misc">
+        $tool.misc.display()
+      </div>
+      #end if
+    </div>
+  </body>
+</html>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/gd_snp2vcf.pl	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,221 @@
+#!/usr/bin/perl -w
+use strict;
+
+#convert from gd_snp file to vcf file (with dbSNP fields)
+
+#gd_snp table format:
+#1. chr
+#2. position (0 based)
+#3. ref allele
+#4. second allele
+#5. overall quality
+#foreach individual (6-9, 10-13, ...)
+#a. count of allele in 3
+#b. count of allele in 4
+#c. genotype call (-1, or count of ref allele)
+#d. quality of genotype call (quality of non-ref allele from masterVar)
+
+if (!@ARGV) {
+   print "usage: gd_snp2vcf.pl file.gd_snp[.gz|.bz2] -geno=8[,12:16,20...] -handle=HANDLE -batch=BATCHNAME -ref=REFERENCEID [-bioproj=XYZ -biosamp=ABC -pop=POPID[,POPID2...] -chrCol=9 -posCol=9 ] > snpsForSubmission.vcf\n";
+   exit;
+}
+
+my $in = shift @ARGV;
+my $genoCols = '';
+my $handle;
+my $batch;
+my $bioproj;
+my $biosamp;
+my $ref;
+my $pop;
+my $cr = 0; #allow to use alternate reference?
+my $cp = 1;
+my $meta;
+my $offset = 0; #offset for genotype column, gd_snp vs gd_genotype indivs file
+foreach (@ARGV) {
+   if (/-geno=([0-9,]+)/) { $genoCols .= "$1:"; }
+   elsif (/-geno=(.*)/) { $genoCols .= readGeno($1); }
+   elsif (/-off=([0-9])/) { $offset = $1; }
+   elsif (/-handle=(.*)/) { $handle = $1; }
+   elsif (/-batch=(.*)/) { $batch = $1; }
+   elsif (/-bioproj=(.*)/) { $bioproj = $1; }
+   elsif (/-biosamp=(.*)/) { $biosamp = $1; }
+   elsif (/-ref=(.*)/) { $ref = $1; }
+   elsif (/-population=(\S+)/) { $pop = $1; }
+   elsif (/-chrCol=(\d+)/) { $cr = $1 - 1; }
+   elsif (/-posCol=(\d+)/) { $cp = $1 - 1; }
+   elsif (/-metaOut=(.*)/) { $meta = $1; }
+}
+if ($cr < 0 or $cp < 0) { die "ERROR the column numbers should be 1 based.\n"; }
+
+#remove trailing delimiters
+$genoCols =~ s/,:/:/g;
+$genoCols =~ s/[,:]$//;
+
+my @gnc = split(/,|:/, $genoCols);
+
+if ($in =~ /.gz$/) {
+   open(FH, "zcat $in |") or die "Couldn't open $in, $!\n";
+}elsif ($in =~ /.bz2$/) {
+   open(FH, "bzcat $in |") or die "Couldn't open $in, $!\n";
+}else {
+   open(FH, $in) or die "Couldn't open $in, $!\n";
+}
+my @head = prepHeader();
+if (@head) {
+   print join("\n", @head), "\n";
+   #now column headers
+   print "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO";
+   if (defined $pop) {
+      $pop =~ s/,$//;
+      my $t = $pop;
+      $t =~ s/,/\t/g;
+      print "\tFORMAT\t$t";
+   }
+   print "\n";
+}
+while (<FH>) {
+   chomp;
+   if (/^#/) { next; }
+   if (/^\s*$/) { next; }
+   my @f = split(/\t/);
+   #vcf columns: chrom pos id ref alt qual filter info
+   # info must have VRT=[0-9] 1==SNV 2=indel 6=NoVariation 8=MNV ...
+   my $vrt = 1;
+   if ($f[2] !~ /^[ACTG]$/ or $f[3] !~ /^[ACTG]$/) {
+      die "Sorry this can only do SNV's at this time\n";
+   }
+   if (scalar @gnc == 1) { #single genotype column
+      if (!defined $f[4] or $f[4] == -1) { $f[4] = '.'; }
+      if ($f[$gnc[0]-1] == 2) { $vrt = 6; } #reference match
+      print "$f[$cr]\t$f[$cp]\t$f[$cr];$f[$cp]\t$f[2]\t$f[3]\t$f[4]\t.\tVRT=$vrt\n";
+      #TODO? put read counts in comment?
+   }elsif ($pop) { #do as population
+      my @cols;
+      foreach my $gp (split(/:/,$genoCols)) { #foreach population
+         my @g = split(/,/, $gp);
+         my $totChrom = 2*(scalar @g);
+         my $totRef = 0;
+         foreach my $i (@g) { if (!defined $f[$i-1] or $f[$i-1] == -1) { next; } $totRef += $f[$i-1]; }
+         if ($totChrom == $totRef) { $vrt = 6; }
+         if ($totRef > $totChrom) { die "ERROR likely the wrong column was chosen for genotype\n"; }
+         my $altCnt = $totChrom - $totRef;
+         push(@cols, "$totChrom:$altCnt");
+      }
+      print "$f[$cr]\t$f[$cp]\t$f[$cr];$f[$cp]\t$f[2]\t$f[3]\t$f[4]\t.\tVRT=$vrt\tNA:AC\t", join("\t", @cols), "\n";
+   }else { #leave allele counts off
+      my $totChrom = 2*(scalar @gnc);
+      my $totRef = 0;
+      foreach my $i (@gnc) { if ($f[$i-1] == -1) { next; } $totRef += $f[$i-1]; }
+      if ($totChrom == $totRef) { $vrt = 6; }
+      print "$f[$cr]\t$f[$cp]\t$f[$cr];$f[$cp]\t$f[2]\t$f[3]\t$f[4]\t.\tVRT=$vrt\n";
+   }
+}
+close FH or die "Couldn't close $in, $!\n";
+
+if ($meta) {
+   open(FH, ">", $meta) or die "Couldn't open $meta, $!\n";
+   print FH "TYPE: CONT\n",
+            "HANDLE: $handle\n",
+            "NAME: \n",
+            "FAX: \n",
+            "TEL: \n",
+            "EMAIL: \n",
+            "LAB: \n",
+            "INST: \n",
+            "ADDR: \n",
+            "||\n",
+            "TYPE: METHOD\n",
+            "HANDLE: $handle\n",
+            "ID: \n",
+            "METHOD_CLASS: Sequence\n",
+            "TEMPLATE_TYPE: \n",
+            "METHOD:\n",
+            "||\n";
+   if ($pop) {
+      my @p = split(/,/, $pop);
+      foreach my $t (@p) {
+         print FH
+            "TYPE: POPULATION\n",
+            "HANDLE: $handle\n",
+            "ID: $t\n",
+            "POPULATION: \n",
+            "||\n";
+      }
+   }
+   print FH "TYPE: SNPASSAY\n",
+            "HANDLE: $handle\n",
+            "BATCH: $batch\n",
+            "MOLTYPE: \n",
+            "METHOD: \n",
+            "ORGANISM: \n",
+            "||\n",
+            "TYPE: SNPPOPUSE | SNPINDUSE\n",
+            "HANDLE: $handle\n",
+            "BATCH: \n",
+            "METHOD: \n",
+            "||\n";
+
+   close FH or die "Couldn't close $meta, $!\n";
+}
+
+exit 0;
+
+#parse old header and add or create new
+sub prepHeader {
+   my @h;
+   $h[0] = '##fileformat=VCFv4.1';
+   my ($day, $mo, $yr) = (localtime)[3,4,5];
+   $mo++;
+   $yr+=1900;
+   $h[1] = '##fileDate=' . "$yr$mo$day";
+   $h[2] = "##handle=$handle";
+   $h[3] = "##batch=$batch";
+   my $i = 4;
+   if ($bioproj) { $h[$i] = "##bioproject_id=$bioproj";  $i++; }
+   if ($biosamp) { $h[$i] = "##biosample_id=$biosamp"; $i++; }
+   $h[$i] = "##reference=$ref";  ##reference=GCF_999999.99
+   #$i++;
+   #$h[$i] = '##INFO=<ID=LID, Number=1,Type=string, Description="Unique local variation ID or name for display. The LID provided here combined with the handle must be unique for a particular submitter.">'
+   $i++;
+   $h[$i] = '##INFO=<ID=VRT,Number=1,Type=Integer,Description="Variation type,1 - SNV: single nucleotide variation,2 - DIV: deletion/insertion variation,3 - HETEROZYGOUS: variable, but undefined at nucleotide level,4 - STR: short tandem repeat (microsatellite) variation, 5 - NAMED: insertion/deletion variation of named repetitive element,6 - NO VARIATON: sequence scanned for variation, but none observed,7 - MIXED: cluster contains submissions from 2 or more allelic classes (not used) ,8 - MNV: multiple nucleotide variation with all eles of common length greater than 1,9 - Exception">';
+   #sometimes have allele freqs?
+   if (defined $pop) {
+      $i++;
+      $h[$i] = "##FORMAT=<ID=NA,Number=1,Type=Integer,Description=\"Number of alleles for the population.\"";
+      $i++;
+      $h[$i] = '##FORMAT=<ID=AC,Number=.,Type=Integer,Description="Allele count for each alternate allele.">';
+      my @p = split(/,/, $pop);
+      foreach my $t (@p) {
+         $i++;
+         $h[$i] = "##population_id=$t";
+      }
+   }
+   #PMID?
+##INFO=<ID=PMID,Number=.,Type=Integer,Description="PubMed ID linked to variation if available.">
+
+   return @h;
+}
+####End
+
+#read genotype columns from a file
+sub readGeno {
+   my $list = shift @_;
+   my @files = split(/,/, $list);
+   my $cols='';
+   foreach my $file (@files) {
+      open(FH, $file) or die "Couldn't read $file, $!\n";
+      while (<FH>) {
+         chomp;
+         my @f = split(/\s+/);
+         if ($f[0] =~/\D/) { die "ERROR expect an integer for the column\n"; }
+         $f[0] += $offset;
+         $cols .= "$f[0],";
+      }
+      close FH;
+      $cols .= ":";
+   }
+   $cols =~ s/,:$//;
+   return $cols;
+}
+####End
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/gd_snp2vcf.xml	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,154 @@
+<tool id="gd_snp2vcf" name="gd_snp to VCF" version="1.0.0" force_history_refresh="True">
+  <description>: Convert from gd_snp to VCF format, for submission to dbSNP</description>
+
+  <command interpreter="perl">
+    gd_snp2vcf.pl "$input" -handle=$hand -batch=$batch -ref=$ref -metaOut=$output2
+    #if $individuals.choice == '0':
+      #set $geno = ''
+      #for $individual_col in $input.dataset.metadata.individual_columns
+        ##need to check to number of cols per individual
+        #if $input.ext == "gd_snp":
+           #set $t = $individual_col + 2
+        #else if $input.ext == "gd_genotype":
+           #set $t = $individual_col
+        #else:
+           #set $t = $individual_col
+        #end if
+        #set $geno += "%d," % ($t)
+      #end for
+      #if $individuals.pall_id != '':
+        -population=$individuals.pall_id
+      #end if
+    #else if $individuals.choice == '1':
+      #set $geno = ''
+      #set $pop = ''
+      #if $input.ext == "gd_snp":
+         -off=2
+      #else if $input.ext == "gd_genotype":
+         -off=0
+      #else:
+         -off=2
+      #end if
+      #for $population in $individuals.populations
+        #set $geno += "%s," % ($population.p1_input)
+        #set $pop += "%s," % ($population.p1_id)
+      #end for
+      -population=$pop
+    #else if $individuals.choice == '2':
+      #set $geno = $individuals.geno
+    #end if
+    -geno=$geno
+    #if $bioproj.value != '':
+       -bioproj=$bioproj
+    #end if
+    #if $biosamp.value != '':
+       -biosamp=$biosamp
+    #end if
+    > $output
+  </command>
+
+  <inputs>
+    <param name="input" type="data" format="gd_snp,gd_genotype" label="SNP dataset" />
+    <conditional name="individuals">
+      <param name="choice" type="select" label="Generate dataset for">
+        <option value="0" selected="true">All individuals</option>
+        <option value="1">Individuals in populations</option>
+        <option value="2">A single individual</option>
+      </param>
+      <when value="0">
+        <param name="pall_id" type="text" size="20" label="ID for this population" help="Leaving this blank will omit allele counts from the output" />
+      </when>
+      <when value="1">
+        <repeat name="populations" title="Population" min="1">
+        <param name="p1_input" type="data" format="gd_indivs" label="Population individuals" />
+        <param name="p1_id" type="text" size="20" label="ID for this population" help="Leaving this blank will omit allele counts from the output" />
+        </repeat>
+      </when>
+      <when value="2">
+        <param name="geno" type="data_column" data_ref="input" label="Column containing genotype" value="8" />
+      </when>
+    </conditional>
+    <param name="hand" type="text" size="20" label="dbSNP handle" help="If you do not have a handle, request one at http://www.ncbi.nlm.nih.gov/projects/SNP/handle.html" />
+    <param name="batch" type="text" size="20" label="Batch ID" help="ID used to tie dbSNP metadata to the VCF submission" />
+    <param name="ref" type="text" size="20" label="Reference sequence ID" help="The RefSeq assembly accession.version on which the SNP positions are based (see http://www.ncbi.nlm.nih.gov/assembly/)" />
+    <param name="bioproj" type="text" size="20" label="Optional: Registered BioProject ID" />
+    <param name="biosamp" type="text" size="20" label="Optional: Comma-separated list of registered BioSample IDs" />
+  </inputs>
+
+  <outputs>
+    <data name="output" format="vcf" />
+    <data name="output2" format="text" />
+  </outputs>
+
+  <tests>
+    <test>
+      <param name="input" value="sample.gd_snp" ftype="gd_snp" />
+      <param name="choice" value="2" />
+      <param name="geno" value="11" />
+      <param name="hand" value="MyHandle" />
+      <param name="batch" value="Test1" />
+      <param name="ref" value="pb_000001.1" />
+      <output name="output" file="snpsForSubmission.vcf" ftype="vcf" compare="diff" />
+      <output name="output2" file="snpsForSubmission.text" ftype="text" compare="diff" />
+    </test>
+  </tests>
+
+  <help>
+
+**Dataset formats**
+
+The input dataset is in gd_snp_ format.
+The output consists of two datasets needed for submitting SNPs:
+a VCF_ file in the specific format required by dbSNP, and a partially
+completed text_ file for the associated dbSNP metadata.
+(`Dataset missing?`_)
+
+.. _gd_snp: ./static/formatHelp.html#gd_snp
+.. _VCF: ./static/formatHelp.html#vcf
+.. _text: ./static/formatHelp.html#text
+.. _Dataset missing?: ./static/formatHelp.html
+
+-----
+
+**What it does**
+
+This tool converts a dataset in gd_snp format to a VCF file formatted
+for submission to the dbSNP database at NCBI.  It also creates a partially
+filled-in template to assist you in preparing the required "metadata" file
+describing the SNP submission.
+
+-----
+
+**Example**
+
+- input::
+
+    #{"column_names":["scaf","pos","A","B","qual","ref","rpos","rnuc","1A","1B","1G","1Q","2A","2B","2G","2Q","3A","3B","3G","3Q","4A","4B","4G","4Q","5A","5B","5G","5Q","6A","6B","6G","6Q","pair","dist",
+    #"prim","rflp"],"dbkey":"canFam2","individuals":[["PB1",9],["PB2",13],["PB3",17],["PB4",21],["PB6",25],["PB8",29]],"pos":2,"rPos":7,"ref":6,"scaffold":1,"species":"bear"}
+    Contig161  115    C      T      73.5    chr1    4641382   C      6      0      2      45     8      0      2      51     15     0      2      72     5      0      2      42     6      0      2      45     10     0      2      57     Y      54     0.323   0
+    Contig48   11     A      G      94.3    chr1    10150264  A      1      0      2      30     1      0      2      30     1      0      2      30     3      0      2      36     1      0      2      30     1      0      2      30     Y      22     +99.    0
+    Contig20   66     C      T      54.0    chr1    21313534  C      4      0      2      39     4      0      2      39     5      0      2      42     4      0      2      39     4      0      2      39     5      0      2      42     N      1      +99.    0
+    etc.
+
+- VCF output (for all individuals, and giving a population ID)::
+
+    #CHROM     POS    ID               REF    ALT    QUAL    FILTER  INFO    FORMAT  PB
+    Contig161  115    Contig161;115    C      T      73.5    .       VRT=6   NA:AC   8:0
+    Contig48   11     Contig48;11      A      G      94.3    .       VRT=6   NA:AC   8:0
+    Contig     66     Contig20;66      C      T      54.0    .       VRT=6   NA:AC   8:0
+    etc.
+
+Note:  This excerpt from the output does not show all of the headers.  Also,
+if the population ID had not been given, then the last two columns would not
+appear in the output.
+
+-----
+
+**Reference**
+
+Sherry ST, Ward MH, Kholodov M, Baker J, Phan L, Smigielski EM, Sirotkin K.
+dbSNP: the NCBI database of genetic variation. Nucleic Acids Res. 2001
+Jan 1;29(1):308-11.
+
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/gd_util.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,77 @@
+#!/usr/bin/env python
+
+import base64
+import errno
+import os
+import subprocess
+import sys
+import zlib
+
+################################################################################
+
+def die(message):
+    print >> sys.stderr, message
+    sys.exit(1)
+
+################################################################################
+
+def mkdir_p(path):
+    try:
+        os.makedirs(path)
+    except OSError, e:
+        if e.errno <> errno.EEXIST:
+            raise
+
+################################################################################
+
+def run_program(prog, args, stdout=subprocess.PIPE, stderr=subprocess.PIPE):
+    kwargs = {
+        "bufsize": -1,
+        "close_fds": False,
+        "creationflags": 0,
+        "cwd": None,
+        "env": None,
+        "executable": prog,
+        "preexec_fn": None,
+        "shell": False,
+        "startupinfo": None,
+        "stderr": stderr,
+        "stdin": None,
+        "stdout": stdout,
+        "universal_newlines": False
+    }
+
+    str_args = [str(x) for x in args]
+
+    p = subprocess.Popen(str_args, **kwargs)
+    (stdoutdata, stderrdata) = p.communicate()
+    rc = p.returncode
+
+    if rc != 0:
+        die('FAILED:\n{0}\nCOMMAND:\n{1}'.format(stderrdata, ' '.join(str_args)))
+
+    return stdoutdata, stderrdata
+
+################################################################################
+
+def unwrap_string(string):
+    try:
+        decoded_string = base64.b64decode(string)
+    except TypeError, message:
+        die('base64.b64decode: {0}: {1}'.format(message, string))
+
+    try:
+        return zlib.decompress(decoded_string)
+    except zlib.error, message:
+        die('zlib.decompress: {0}'.format(message))
+
+################################################################################
+
+def wrap_string(string, level=9):
+    try:
+        compressed_string = zlib.compress(string, level)
+    except zlib.error, message:
+        die('zlib.compress: {0}'.format(message))
+    return base64.b64encode(compressed_string)
+
+################################################################################
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/genome_diversity.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,266 @@
+#!/usr/bin/env python
+
+import sys
+import cdblib
+
+def _openfile( filename=None, mode='r' ):
+    try:
+        fh = open( filename, mode )
+    except IOError, err:
+        raise RuntimeError( "can't open file: %s\n" % str( err ) )
+    return fh
+
+def get_filename_from_loc( species=None, filename=None ):
+    fh = _openfile( filename )
+    for line in fh:
+        if line and not line.startswith( '#' ):
+            line = line.rstrip( '\r\n' )
+            if line:
+                elems = line.split( '\t' )
+                if len( elems ) >= 2 and elems[0] == species:
+                    return elems[1]
+
+    raise RuntimeError( "can't find '%s' in location file: %s\n" % ( species, filename ) )
+
+
+class SnpFile( object ):
+    def __init__( self, filename=None, seq_col=1, pos_col=2, ref_seq_col=7, ref_pos_col=8 ):
+        self.filename = filename
+        self.fh = _openfile( filename )
+        self.seq_col = seq_col
+        self.pos_col = pos_col
+        self.ref_seq_col = ref_seq_col
+        self.ref_pos_col = ref_pos_col
+        self.elems = None
+        self.line = None
+        self.comments = []
+
+    def next( self ):
+        while self.fh:
+            try:
+                self.line = self.fh.next()
+            except StopIteration:
+                self.line = None
+                self.elems = None
+                return None
+            if self.line:
+                self.line = self.line.rstrip( '\r\n' )
+                if self.line:
+                    if self.line.startswith( '#' ):
+                        self.comments.append( self.line )
+                    else:
+                        self.elems = self.line.split( '\t' )
+                        return 1
+
+    def get_seq_pos( self ):
+        if self.elems:
+            return self.elems[ self.seq_col - 1 ], self.elems[ self.pos_col - 1 ]
+        else:
+            return None, None
+
+    def get_ref_seq_pos( self ):
+        if self.elems:
+            return self.elems[ self.ref_seq_seq - 1 ], self.elems[ self.ref_pos_col - 1 ]
+        else:
+            return None, None
+
+
+class IndexedFile( object ):
+
+    def __init__( self, data_file=None, index_file=None ):
+        self.data_file = data_file
+        self.index_file = index_file
+        self.data_fh = _openfile( data_file )
+        self.index_fh = _openfile( index_file )
+        self._reader = cdblib.Reader( self.index_fh.read(), hash )
+
+    def get_indexed_line( self, key=None ):
+        line = None
+        if key in self._reader:
+            offset = self._reader.getint( key )
+            self.data_fh.seek( offset )
+            try:
+                line = self.data_fh.next()
+            except StopIteration:
+                raise RuntimeError( 'index file out of sync for %s' % key )
+        return line
+
+class PrimersFile( IndexedFile ):
+    def get_primer_header( self, sequence=None, position=None ):
+        key = "%s %s" % ( str( sequence ), str( position ) )
+        header = self.get_indexed_line( key )
+        if header:
+            if header.startswith( '>' ):
+                elems = header.split()
+                if len( elems ) < 3:
+                    raise RuntimeError( 'short primers header for %s' % key )
+                if sequence != elems[1] or str( position ) != elems[2]:
+                    raise RuntimeError( 'primers index for %s finds %s %s' % ( key, elems[1], elems[2] ) )
+            else:
+                raise RuntimeError( 'primers index out of sync for %s' % key )
+        return header
+
+    def get_entry( self, sequence=None, position=None ):
+        entry = self.get_primer_header( sequence, position )
+        if entry:
+            while self.data_fh:
+                try:
+                    line = self.data_fh.next()
+                except StopIteration:
+                    break
+                if line.startswith( '>' ):
+                    break
+                entry += line
+        return entry
+
+    def get_enzymes( self, sequence=None, position=None ):
+        entry = self.get_primer_header( sequence, position )
+        enzyme_list = []
+        if entry:
+            try:
+                line = self.data_fh.next()
+            except StopIteration:
+                raise RuntimeError( 'primers entry for %s %s is truncated' % ( str( sequence ), str( position ) ) )
+            if line.startswith( '>' ):
+                raise RuntimeError( 'primers entry for %s %s is truncated' % ( str( sequence ), str( position ) ) )
+            line.rstrip( '\r\n' )
+            if line:
+                enzymes = line.split( ',' )
+                for enzyme in enzymes:
+                    enzyme = enzyme.strip()
+                    if enzyme:
+                        enzyme_list.append( enzyme )
+        return enzyme_list
+
+class SnpcallsFile( IndexedFile ):
+    def get_snp_seq( self, sequence=None, position=None ):
+        key = "%s %s" % ( str( sequence ), str( position ) )
+        line = self.get_indexed_line( key )
+        if line:
+            elems = line.split( '\t' )
+            if len (elems) < 3:
+                raise RuntimeError( 'short snpcalls line for %s' % key )
+            if sequence != elems[0] or str( position ) != elems[1]:
+                raise RuntimeError( 'snpcalls index for %s finds %s %s' % ( key, elems[0], elems[1] ) )
+            return elems[2]
+        else:
+            return None
+
+    def get_flanking_dna( self, sequence=None, position=None, format='fasta' ):
+        if format != 'fasta' and format != 'primer3':
+            raise RuntimeError( 'invalid format for flanking dna: %s' % str( format ) )
+        seq = self.get_snp_seq( sequence, position )
+        if seq:
+            p = seq.find('[')
+            if p == -1:
+                raise RuntimeError( 'snpcalls entry for %s %s missing left bracket: %s' % ( str( sequence ), str( position ), seq ) )
+            q = seq.find(']', p + 1)
+            if q == -1:
+                raise RuntimeError( 'snpcalls entry for %s %s missing right bracket: %s' % ( str( sequence ), str( position ), seq ) )
+            q += 1
+
+            if format == 'fasta':
+                flanking_seq = '> '
+            else:
+                flanking_seq = 'SEQUENCE_ID='
+
+            flanking_seq += "%s %s %s %s\n" % ( str( sequence ), str( position ), seq[p+1], seq[p+3] )
+
+            if format == 'primer3':
+                flanking_seq += 'SEQUENCE_TEMPLATE='
+
+            flanking_seq += "%sn%s\n" % ( seq[0:p], seq[q:] )
+
+            if format == 'primer3':
+                flanking_seq += "SEQUENCE_TARGET=%d,11\n=\n" % ( p - 5 )
+
+            return flanking_seq
+        else:
+            return None
+
+
+
+class LocationFile( object ):
+    def __init__(self, filename):
+        self.build_map(filename)
+
+    def build_map(self, filename):
+        self.map = {}
+        self.open_file(filename)
+        for line in self.read_lines():
+            elems = line.split('\t', 1)
+            if len(elems) == 2:
+                self.map[ elems[0].strip() ] = elems[1].strip()
+        self.close_file()
+
+    def read_lines(self):
+        for line in self.fh:
+            if not line.startswith('#'):
+                line = line.rstrip('\r\n')
+                yield line
+
+    def open_file(self, filename):
+        self.filename = filename
+        try:
+            self.fh = open(filename, 'r')
+        except IOError, err:
+            print >> sys.stderr, "Error opening location file '%s': %s" % (filename, str(err))
+            sys.exit(1)
+
+    def close_file(self):
+        self.fh.close()
+
+    def loc_file( self, key ):
+        if key in self.map:
+            return self.map[key]
+        else:
+            print >> sys.stderr, "'%s' does not appear in location file '%s'" % (key, self.filename)
+            sys.exit(1)
+
+class ChrLens( object ):
+    def __init__( self, chrlen_filename ):
+        self.chrlen_filename = chrlen_filename
+        self.build_map()
+
+    def build_map(self):
+        self.map = {}
+        self.open_file(self.chrlen_filename)
+        for line in self.read_lines():
+            elems = line.split('\t', 1)
+            if len(elems) == 2:
+                chrom = elems[0].strip()
+                chrom_len_text = elems[1].strip()
+                try:
+                    chrom_len = int( chrom_len_text )
+                except ValueError:
+                    print >> sys.stderr, "Bad length '%s' for chromosome '%s' in '%s'" % (chrom_len_text, chrom, self.chrlen_filename)
+                self.map[ chrom ] = chrom_len
+        self.close_file()
+
+    def read_lines(self):
+        for line in self.fh:
+            if not line.startswith('#'):
+                line = line.rstrip('\r\n')
+                yield line
+
+    def open_file(self, filename):
+        self.filename = filename
+        try:
+            self.fh = open(filename, 'r')
+        except IOError, err:
+            print >> sys.stderr, "Error opening chromosome length file '%s': %s" % (filename, str(err))
+            sys.exit(1)
+
+    def close_file(self):
+        self.fh.close()
+
+    def length( self, key ):
+        if key in self.map:
+            return self.map[key]
+        else:
+            return None
+
+    def __iter__( self ):
+        for chrom in self.map:
+            yield chrom
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/inbreeding_and_kinship.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,29 @@
+#!/usr/bin/env python
+
+import sys
+import gd_util
+
+################################################################################
+
+if len(sys.argv) != 6:
+    gd_util.die('Usage')
+
+ped_input, ind_input, computed_value, output, kinship_input = sys.argv[1:]
+
+################################################################################
+
+prog = 'inbreed'
+
+args = [ prog ]
+args.append(ped_input)      # pedigree
+args.append(ind_input)      # specified individuals (e.g.,,potential breeding population)
+args.append(kinship_input)  # kinships of founders
+args.append(computed_value) # 0 = inbreedng coefficients, 1 = kinships, 2 = mean kinships
+
+with open(output, 'w') as fh:
+    gd_util.run_program(prog, args, stdout=fh)
+
+################################################################################
+
+sys.exit(0)
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/inbreeding_and_kinship.xml	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,133 @@
+<tool id="gd_inbreeding_and_kinship" name="Inbreeding and kinship" version="1.0.0">
+  <description>: Analyze the pedigree without genomic data</description>
+
+  <command interpreter="python">
+    inbreeding_and_kinship.py '$ped_input' '$ind_input' '$computed_value' '$output'
+    #if $kinship_dataset.choice == '0'
+      '/dev/null'
+    #else if $kinship_dataset.choice == '1'
+      '$kinship_input'
+    #end if
+  </command>
+
+  <inputs>
+    <param name="ped_input" type="data" format="txt" label="Pedigree dataset" />
+    <param name="ind_input" type="data" format="txt" label="Individuals dataset" />
+    <conditional name="kinship_dataset">
+      <param name="choice" type="select" format="integer" label="Kinship dataset">
+        <option value="0" selected="true">no kinship dataset</option>
+        <option value="1">select kinship dataset</option>
+      </param>
+      <when value="0" />
+      <when value="1">
+        <param name="kinship_input" type="data" format="txt" label="Kinship dataset" />
+      </when>
+    </conditional>
+    <param name="computed_value" type="select" format="integer" label="Computed value">
+      <option value="0" selected="true">inbreeding coeffiecients</option>
+      <option value="1">kinships</option>
+      <option value="2">mean kinships</option>
+    </param>
+  </inputs>
+
+  <outputs>
+    <data name="output" format="txt" />
+  </outputs>
+
+  <requirements>
+    <requirement type="package" version="0.1">gd_c_tools</requirement>
+  </requirements>
+
+  <!--
+  <tests>
+  </tests>
+  -->
+
+  <help>
+
+**Dataset formats**
+
+The input datasets are in text_ format.
+The output dataset is in text_ format.
+
+.. _text: ./static/formatHelp.html#text
+
+-----
+
+**What it does**
+
+The user specifies a pedigree.  This is done with a Galaxy table with one
+row per individual, containing (1) the individual's name, (2) the name of
+one of the individual's parents, which must have occurred at the start
+of a previous line, and (3) the name of the individual's other parent,
+which occurred at the start of a previous line.  For a pedigree founder,
+each parent name is replaced by &quot;-&quot;.
+
+The user also provides a file that specifies a set of names of individuals
+(specifically the first word on each line (one line per individual);
+any subsequent information on a line is ignored.
+
+The user can optionally provide a file giving kinship information for
+each pair of distinct individuals from the founder set.
+
+Finally the user picks from among the options:
+
+  1. inbreeding coefficients for each specified individual
+  2. the kinship for each pair of distinct specified individual
+  3. the mean kinship for each specified individual, i.e., the average kinship value for that individual and every specified individual
+
+The command reports the requested values.
+
+-----
+
+**Example**
+
+- input::
+
+   A - -
+   B - -
+   C - -
+   D - -
+   E - -
+   F A B
+   G A B
+   Thelma A F
+   Louise F G
+
+Rows can have more than three columns (such as the individual's sex),
+but only the first three columns affect this command.
+
+Suppose on the other hand that we select an alternative
+&quot;founder&quot; set, {A, F, G}.  (We require a founder sets to have a
+member on any ancestral path from Thelma or Louise.)  The above pedigree
+file is then replaced by::
+
+   A - -
+   F - -
+   G - -
+   Thelma A F
+   Louise F G
+
+The user then also provides a file giving kinship information for each
+pairs of distinct individuals from the founder set; for the current
+example, the kinship file is as follows::
+
+   A F 0.25
+   A G 0.25
+   F G 0.25
+
+since parent-child pairs and siblings both have kinship 0.25.  The
+advantage is that this capability can be used in cases where the kinships
+of the founders are not initially known, but instead are computationally
+predicted, e.g., with the Galaxy &quot;Discover&quot; tool.
+  </help>
+</tool>
+
+
+
+
+
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/lib/galaxy/datatypes/wsf.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,189 @@
+"""
+SnpFile datatype
+"""
+
+import galaxy.datatypes.data
+import tempfile
+import os
+import simplejson
+from galaxy import util
+from galaxy.datatypes.sniff import *
+from galaxy.datatypes.tabular import Tabular
+from galaxy.datatypes.images import Html
+from galaxy.datatypes import metadata
+from galaxy.datatypes.metadata import MetadataElement
+
+class Wped( Html ):
+    allow_datatype_change = False
+    composite_type = 'basic'
+    file_ext = 'gd_ped'
+
+    MetadataElement( name="base_name", desc="base name for all transformed versions of this genetic dataset", default='WpedData', readonly=True, set_in_upload=True )
+
+    def __init__( self, **kwd ):
+        Html.__init__( self, **kwd )
+        self.add_composite_file( '%s.ped', description = 'Pedigree File', substitute_name_with_metadata = 'base_name', is_binary = False )
+        self.add_composite_file( '%s.map', description = 'Map File', substitute_name_with_metadata = 'base_name', is_binary = False )
+
+class Individuals( Tabular ):
+    file_ext = 'gd_indivs'
+    def __init__(self, **kwd):
+        Tabular.__init__( self, **kwd )
+        self.column_names = [ 'Column', 'Name', 'Alias' ]
+
+    def display_peek( self, dataset ):
+        return Tabular.make_html_table( self, dataset, column_names=self.column_names )
+
+class DatasetComments( object ):
+    def __init__( self, dataset, comment_string='#' ):
+        self.dataset = dataset
+        self.comment_string = comment_string
+        self.comment_string_len = len(comment_string)
+        self._comments = []
+        self._read_comments()
+
+    def _read_comments( self ):
+        if self.dataset.has_data():
+            try:
+                for line in open(self.dataset.file_name, 'rU'):
+                    if line.startswith(self.comment_string):
+                        comment = line[self.comment_string_len:]
+                        self._comments.append(comment)
+                    else:
+                        break
+            except:
+                pass
+
+    def __str__( self ):
+        return "".join(self._comments)
+
+    @property
+    def comments( self ):
+        return self._comments
+
+class DatasetCommentMetadata( object ):
+    def __init__( self, dataset, comment_string='#' ):
+        self.dataset_comments = DatasetComments( dataset, comment_string )
+        self._comment_metadata = {}
+        self._decode_dataset_comments()
+
+    def _decode_dataset_comments( self ):
+        dataset_comment_string = str( self.dataset_comments )
+        try:
+            self._comment_metadata = simplejson.loads( dataset_comment_string )
+        except simplejson.JSONDecodeError as e:
+            pass
+
+    @property
+    def comment_metadata( self ):
+        return self._comment_metadata
+
+class AnnotatedTabular( Tabular ):
+    """ Tabular file with optional comment block containing JSON to be imported into metadata """
+    MetadataElement( name="comment_metadata", desc="comment metadata", param=metadata.DictParameter, visible=False, readonly=True )
+
+    def set_meta( self, dataset, overwrite = True, **kwd ):
+        Tabular.set_meta( self, dataset, overwrite=overwrite, max_data_lines=None, max_guess_type_data_lines=1000, **kwd )
+        if dataset.metadata.comment_metadata is None:
+            dataset_comment_metadata = DatasetCommentMetadata( dataset )
+            dataset.metadata.comment_metadata = dataset_comment_metadata.comment_metadata.copy()
+            self.set_dataset_metadata_from_comments( dataset )
+
+    def set_dataset_metadata_from_comments( self, dataset ):
+        pass
+
+    def set_peek( self, dataset, line_count=None, is_multi_byte=False ):
+        super(Tabular, self).set_peek( dataset, line_count=line_count, is_multi_byte=is_multi_byte, WIDTH='unlimited', skipchars=['#'] )
+
+    def display_peek( self, dataset ):
+        """Returns formated html of peek"""
+        return Tabular.make_html_table( self, dataset, skipchars=['#'] )
+
+class Fake( AnnotatedTabular ):
+    MetadataElement( name="scaffold", desc="scaffold column", param=metadata.ColumnParameter, default=0 )
+    MetadataElement( name="pos", desc="pos column", param=metadata.ColumnParameter, default=0 )
+    MetadataElement( name="ref", desc="ref column", param=metadata.ColumnParameter, default=0 )
+    MetadataElement( name="rPos", desc="rPos column", param=metadata.ColumnParameter, default=0 )
+    MetadataElement( name="species", desc="species", default='', no_value='', visible=False, readonly=True )
+
+    def set_dataset_metadata_from_comments( self, dataset ):
+        self.set_dataset_column_names_metadata( dataset )
+        self.set_dataset_columnParameter_metadata( dataset )
+        self.set_dataset_species_metadata( dataset )
+        self.set_dataset_dbkey_metadata( dataset )
+
+    def set_dataset_column_names_metadata( self, dataset ):
+        value_from_comment_metadata = dataset.metadata.comment_metadata.get( 'column_names', None )
+        if isinstance( value_from_comment_metadata, list ):
+            dataset.metadata.column_names = value_from_comment_metadata[:]
+
+    def set_dataset_columnParameter_metadata( self, dataset ):
+        for name, spec in dataset.metadata.spec.items():
+            if isinstance( spec.param, metadata.ColumnParameter ):
+                value_from_comment_metadata = dataset.metadata.comment_metadata.get( name, None )
+                if value_from_comment_metadata is not None:
+                    try:
+                        i = int( value_from_comment_metadata )
+                    except:
+                        i = 0
+                    if 0 <= i <= dataset.metadata.columns:
+                        setattr( dataset.metadata, name, i )
+
+    def set_dataset_species_metadata( self, dataset ):
+        value_from_comment_metadata = dataset.metadata.comment_metadata.get( 'species', None )
+        if isinstance( value_from_comment_metadata, basestring ):
+            dataset.metadata.species = value_from_comment_metadata
+
+    def set_dataset_dbkey_metadata( self, dataset ):
+        value_from_comment_metadata = dataset.metadata.comment_metadata.get( 'dbkey', '?' )
+        if isinstance( value_from_comment_metadata, basestring ):
+            dataset.metadata.dbkey = value_from_comment_metadata
+
+class GDSnp( Fake ):
+    """ Webb's SNP file format """
+    file_ext = 'gd_snp'
+
+    MetadataElement( name="individual_names", desc="individual names", visible=False, readonly=True )
+    MetadataElement( name="individual_columns", desc="individual columns", visible=False, readonly=True )
+
+    def set_dataset_metadata_from_comments( self, dataset ):
+        Fake.set_dataset_metadata_from_comments( self, dataset )
+        self.set_dataset_individual_metadata( dataset )
+
+    def set_dataset_individual_metadata( self, dataset ):
+        individual_list = dataset.metadata.comment_metadata.get( 'individuals', None )
+        if not isinstance( individual_list, list ):
+            individual_list = []
+
+        individual_names = []
+        individual_columns = []
+
+        for individual in individual_list:
+            if not isinstance( individual, list ) or len( individual ) != 2:
+                continue
+            name, col = individual
+            if not isinstance( name, basestring ):
+                name = ''
+            try:
+                c = int( col )
+            except:
+                c = 0
+            if 0 < c <= dataset.metadata.columns:
+                individual_names.append( name )
+                individual_columns.append( c )
+
+        if individual_names:
+            dataset.metadata.individual_names = individual_names[:]
+            dataset.metadata.individual_columns = individual_columns[:]
+
+class GDGenotype( GDSnp ):
+    """ Webb's genotype file format """
+    file_ext = 'gd_genotype'
+
+class GDSap( Fake ):
+    """ Webb's SAP file format """
+    file_ext = 'gd_sap'
+
+    MetadataElement( name="kegg_gene", desc="KEGG gene code column", param=metadata.ColumnParameter, default=0 )
+    MetadataElement( name="kegg_path", desc="KEGG pathway code/name column", param=metadata.ColumnParameter, default=0 )
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/make_gd_file.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,187 @@
+#!/usr/bin/env python
+
+import base64
+import json
+import math
+import re
+import sys
+
+identifier_regex = re.compile('[0-9A-Z_a-z]+$')
+
+def unwrap_column_names(string):
+    column_names = []
+    string = unwrap_string(string)
+    for line in string.split('\n'):
+        line = line.strip()
+        if is_identifier(line):
+            column_names.append(line)
+        else:
+            die('invalid column name format: {}'.format(line))
+    return column_names
+
+def unwrap_string(string):
+    try:
+        decoded = base64.b64decode(string)
+    except:
+        die('invalid base64 string: {}'.format(string))
+    return decoded
+
+def is_identifier(string):
+    match = identifier_regex.match(string)
+    if match:
+        return True
+    else:
+        return False
+
+def read_individual_names(filename):
+    tokens = []
+    names = []
+    with open(filename) as fh:
+        for line in fh:
+            line = line.rstrip('\r\n')
+            elems = line.split()
+
+            columns = len(elems)
+            if columns == 0:
+                continue
+
+            first_token = elems[0]
+
+            if columns == 1:
+                name = first_token
+            else:
+                keywords = ' '.join(elems[1:])
+                name = ' '.join([first_token, keywords])
+
+            if first_token not in tokens:
+                tokens.append(first_token)
+                names.append(name)
+            else:
+                die('duplicate first column entry in Names dataset: {}'.format(first_token))
+    return names
+
+def fold_line(line, maxlen, prefix):
+    prefix_len = len(prefix)
+
+    lines = []
+
+    while len(line) > maxlen:
+        split_points = []
+        state = 0
+        for i in range(maxlen - prefix_len):
+            c = line[i]
+            if state == 0:
+                if c == '"':
+                    state = 1
+                elif c in [ '{', ':', ',', '}', '[', ']' ]:
+                    split_points.append(i)
+            elif state == 1:
+                if c == '"':
+                    state = 0
+                elif c == '\\':
+                    state = 2
+            elif state == 2:
+                state = 1
+        idx = split_points[-1]
+        lines.append('{0}{1}'.format(prefix, line[:idx+1]))
+        line = line[idx+1:]
+
+    lines.append('{0}{1}'.format(prefix, line))
+
+    return lines
+
+def die(message):
+    print >> sys.stderr, message
+    sys.exit(1)
+
+################################################################################
+
+type_to_columns = {
+    'gd_snp':4,
+    'gd_genotype':1
+}
+
+if len(sys.argv) != 12:
+    print >> sys.stderr, 'Usage'
+    sys.exit(1)
+
+input, scaffold_col, pos_col, ref_col, rPos_col, preamble_arg, names, species_arg, dbkey, output_type, output = sys.argv[1:12]
+
+preamble_column_names = unwrap_column_names(preamble_arg)
+first_individual_column = len(preamble_column_names) + 1
+
+individual_names = read_individual_names(names)
+
+species = unwrap_string(species_arg)
+if not is_identifier(species):
+    die('invalid species format: {}'.format(species))
+
+if not output_type in type_to_columns:
+    die('unknown output type: {}'.format(output_type))
+columns_per_individual = type_to_columns[output_type]
+
+jdict = {}
+
+column_names = preamble_column_names[:]
+for i in range(1, len(individual_names) + 1):
+    if output_type == 'gd_snp':
+        column_names.append('{}A'.format(i))
+        column_names.append('{}B'.format(i))
+        column_names.append('{}G'.format(i))
+        column_names.append('{}Q'.format(i))
+    elif output_type == 'gd_genotype':
+        column_names.append('{}G'.format(i))
+    else:
+        die('unknown output type: {}'.format(output_type))
+
+jdict['column_names'] = column_names
+
+individuals = []
+
+for pos, individual in enumerate(individual_names):
+    col = first_individual_column + pos * columns_per_individual
+    individuals.append([individual, col])
+
+jdict['individuals'] = individuals
+
+jdict['scaffold'] = int(scaffold_col)
+jdict['pos'] = int(pos_col)
+jdict['ref'] = int(ref_col)
+jdict['rPos'] = int(rPos_col)
+
+jdict['species'] = species
+jdict['dbkey'] = dbkey
+
+json_string = json.dumps(jdict, separators=(',',':'), sort_keys=True)
+
+min_cols = len(column_names)
+pos_col = int(pos_col) - 1
+rPos_col = int(rPos_col) - 1
+
+def is_int(string):
+    try:
+        int(string)
+        return True
+    except ValueError:
+        return False
+
+with open(output, 'w') as ofh:
+    lines = fold_line(json_string, 200, '#')
+    for line in lines:
+        print >> ofh, line
+
+    with open(input) as fh:
+        line_number = 0
+        for line in fh:
+            line_number += 1
+            if line[0] == '#':
+                continue
+            line = line.rstrip('\r\n')
+            elems = line.split('\t')
+            if len(elems) < min_cols:
+                die('Too few columns on line {0} of input file.  Expecting {1}, saw {2}.'.format(line_number, min_cols, len(elems)))
+            if not is_int(elems[pos_col]):
+                die('bad pos on line {0} column {1} of input file: {2}'.format(line_number, pos_col+1, elems[pos_col]))
+            if not is_int(elems[rPos_col]):
+                die('bad rPos on line {0} column {1} of input file: {2}'.format(line_number, rPos_col+1, elems[rPos_col]))
+            print >> ofh, line
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/make_gd_file.xml	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,101 @@
+<tool id="gd_make_gd_file" name="Make File" version="1.0.0">
+  <description>: Build a gd_snp or gd_genotype file</description>
+
+  <command interpreter="python">
+    #import base64
+    #set $preamble_arg = base64.b64encode(str($preamble_names))
+    #set $species_arg = base64.b64encode(str($species))
+    make_gd_file.py '$input' '$scaffold_col' '$pos_col' '$ref_col' '$rPos_col' '$preamble_arg' '$names' '$species_arg' '$dbkey' '$output_type' '$output'
+  </command>
+
+  <inputs>
+    <param name="input" type="data" format="tabular" label="Input dataset" />
+    <param name="scaffold_col" type="data_column" data_ref="input" label="Column with scaffold/contig" />
+    <param name="pos_col" type="data_column" numerical="true" data_ref="input" label="Column with position" />
+    <param name="ref_col" type="data_column" data_ref="input" label="Column with reference species chromosome" />
+    <param name="rPos_col" type="data_column" numerical="true" data_ref="input" label="Column with reference species position" />
+
+    <param name="preamble_names" type="text" area="true" size="5x40" label="Preamble column names">
+      <sanitizer>
+        <valid initial="string.printable"/>
+      </sanitizer>
+    </param>
+    <param name="names" type="data" format="txt" label="Names dataset" />
+
+    <param name="species" type="text" label="Focus species">
+      <sanitizer>
+        <valid initial="string.printable"/>
+      </sanitizer>
+    </param>
+    <param name="dbkey" type="genomebuild" label="Reference species" />
+
+    <param name="output_type" type="select" label="Output format">
+      <option value="gd_snp" selected="true">gd_snp</option>
+      <option value="gd_genotype">gd_genotype</option>
+    </param>
+  </inputs>
+
+  <outputs>
+    <data name="output" format="gd_snp">
+      <change_format>
+        <when input="output_type" value="gd_genotype" format="gd_genotype" />
+      </change_format>
+    </data>
+  </outputs>
+
+  <!--
+  <tests>
+    <test>
+      <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" />
+      <param name="p1_input" value="test_in/a.gd_indivs" ftype="gd_indivs" />
+      <param name="lo_coverage" value="0" />
+      <param name="hi_coverage" value="1000" />
+      <param name="low_ind_cov" value="3" />
+      <param name="lo_quality" value="30" />
+      <output name="output" file="test_out/modify_snp_table/modify.gd_snp" />
+    </test>
+  </tests>
+  -->
+
+  <help>
+**Dataset formats**
+
+The input datasets are in tabular_ and text_ formats.
+The output dataset is in gd_snp_ or gd_genotype_ format.  (`Dataset missing?`_)
+
+.. _tabular: ./static/formatHelp.html#tab
+.. _text: ./static/formatHelp.html#text
+.. _gd_snp: ./static/formatHelp.html#gd_snp
+.. _gd_genotype: ./static/formatHelp.html#gd_genotype
+.. _Dataset missing?: ./static/formatHelp.html
+
+-----
+
+**What it does**
+
+This tool simplifies the job of creating a Galaxy file with format gd_snp
+or gd_genotype.  Often, the most complex part of preparing one of these
+files is to specify how individuals are related to columns of the table,
+a task facilitated by this command.  Each gd_snp or gd_genotype file
+typically has columnns giving:
+
+1. scaffold/contig name
+2. zero-based position in the scaffold/contig
+3. reference species chromosome
+4. zero-based position in the reference species chromosome
+
+The user needs to specify the columns containing these data.  Columns are
+numbered starting with 1.  The user also specifies brief column names for
+these columns.  When the focus species and the reference species are the
+same, the scaffold/contig name and reference species chromosome columns
+will be identical, as will the position in the scaffold/contig and
+position in the reference species chromosome columns.
+
+To inform Galaxy about the correpondence between individuals and columns
+of the table, the user directs the tool to a history item that lists
+the individuals in order.  Each line starts with unique name for the
+individuals (no embedded space or tab character), followed by an arbitrary
+(possibly empty) set of words that are helpful for specifying groups
+of individuals.
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/make_phylip.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,511 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+#       mkFastas.py
+#
+#       Copyright 2013 Oscar Reina <oscar@niska.bx.psu.edu>
+#
+#       This program is free software; you can redistribute it and/or modify
+#       it under the terms of the GNU General Public License as published by
+#       the Free Software Foundation; either version 2 of the License, or
+#       (at your option) any later version.
+#
+#       This program is distributed in the hope that it will be useful,
+#       but WITHOUT ANY WARRANTY; without even the implied warranty of
+#       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#       GNU General Public License for more details.
+#
+#       You should have received a copy of the GNU General Public License
+#       along with this program; if not, write to the Free Software
+#       Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+#       MA 02110-1301, USA.
+
+import argparse
+import errno
+import os
+import shutil
+
+def mkdir_p(path):
+    try:
+        os.makedirs(path)
+    except OSError, e:
+        if e.errno <> errno.EEXIST:
+            raise
+
+def revseq(seq):
+    seq=list(seq)
+    seq.reverse()
+    seq=''.join(seq)
+    return seq
+
+def revComp(allPop):
+    dAllCompAll={'A':'T','T':'A','C':'G','G':'C','N':'N','M':'K','K':'M','R':'Y','Y':'R','W':'W','S':'S'}
+    allPopsComp=dAllCompAll[allPop]
+    return allPopsComp
+
+def rtrnCons(ntA,ntB):
+    srtdPairs=''.join(sorted([ntA,ntB]))
+    dpairsCons={'AC':'M', 'AG':'R', 'AT':'W', 'CG':'S', 'CT':'Y', 'GT':'K', 'AN':'A', 'CN':'C', 'GN':'G', 'NT':'T'}
+    cons=dpairsCons[srtdPairs]
+    return cons
+
+def rtrnFxdChrPos(inSNPf,dPopsinSNPfPos,pxchrx,pxpos,pxntA,pxntB,fulldChrdPosdPopsAlllsInit=False,cvrgTreshold=False,indvlsPrctTrshld=False):
+    """
+    """
+    dChrdPosdPopsAlllsInit={}
+    seqref=[]
+    for eachl in open(inSNPf,'r'):
+        if eachl.strip() and eachl[0]!='#':
+            fllInfoSplt=eachl.splitlines()[0].split('\t')
+            chrx=fllInfoSplt[pxchrx]
+            pos=int(fllInfoSplt[pxpos])
+            ntA=fllInfoSplt[pxntA]
+            ntB=fllInfoSplt[pxntB]
+            seqref.append([pos,ntA])
+            dPopsAllls={}
+            if fulldChrdPosdPopsAlllsInit:
+                #~
+                cntIndv=0
+                #
+                try:
+                    fulldPopsAllls=fulldChrdPosdPopsAlllsInit[chrx][pos]
+                except:
+                    fulldPopsAllls=dict([(echPop,ntA) for echPop in dPopsinSNPfPos])
+                #
+                for eachPop in dPopsinSNPfPos:
+                    clmnCvrg=dPopsinSNPfPos[eachPop]
+                    if clmnCvrg:
+                        eachPopCvrg=int(fllInfoSplt[clmnCvrg])
+                    else:
+                        #~ eachPopCvrg=0
+                        eachPopCvrg=cvrgTreshold
+                    if eachPopCvrg>=cvrgTreshold:
+                        dPopsAllls[eachPop]=fulldPopsAllls[eachPop]
+                        cntIndv+=1
+                    else:
+                        dPopsAllls[eachPop]='N'
+                #~
+                if indvlsPrctTrshld>(cntIndv/float(len(dPopsinSNPfPos))):
+                    dPopsAllls=dict([(echPop,'N') for echPop in dPopsinSNPfPos])
+            #~
+            else:
+                for eachPop in dPopsinSNPfPos:
+                    if dPopsinSNPfPos[eachPop]:
+                        eachPopAll=int(fllInfoSplt[dPopsinSNPfPos[eachPop]])
+                        if eachPopAll==0:
+                            dPopsAllls[eachPop]=ntB
+                        elif eachPopAll==2:
+                            dPopsAllls[eachPop]=ntA
+                        elif eachPopAll==1:
+                            dPopsAllls[eachPop]=rtrnCons(ntA,ntB)
+                        else:
+                            dPopsAllls[eachPop]='N'
+                    else:
+                        dPopsAllls[eachPop]=ntA
+            try:
+                dChrdPosdPopsAlllsInit[chrx][pos]=dPopsAllls
+            except:
+                dChrdPosdPopsAlllsInit[chrx]={pos:dPopsAllls}
+    #~
+    seqref.sort()
+    startExs=[seqref[0][0]]
+    endExs=[seqref[-1][0]+1]
+    seqref=''.join(x[1] for x in seqref)
+    #~
+    return dChrdPosdPopsAlllsInit,seqref,chrx,startExs,endExs
+
+
+def rtrndENSEMBLTseq(inCDSfile,inUCSCfile,fchrClmn,txStartClmn,txEndClmn,strandClmn,geneNameClmn,startExsClmn,endExsClmn,cdsStartClmn,cdsEndClmn):
+    """
+    """
+    dENSEMBLTchrxStEndEx={}
+    dChrdStrtEndENSEMBLT={}
+    for eachl in open(inUCSCfile,'r'):
+        if eachl.strip():
+            rvrse=False
+            allVls=eachl.split('\t')
+            txStart=allVls[txStartClmn]
+            txEnd=allVls[txEndClmn]
+            ENSEMBLT=allVls[geneNameClmn]
+            strand=allVls[strandClmn]
+            chrx=allVls[fchrClmn]
+            if cdsStartClmn and cdsEndClmn:
+                cdsStart=allVls[cdsStartClmn]
+                cdsEnd=allVls[cdsEndClmn]
+            if startExsClmn and endExsClmn:
+                startExs=allVls[startExsClmn]
+                endExs=allVls[endExsClmn]
+            if strand=='-':
+                rvrse=True
+            try:
+                dChrdStrtEndENSEMBLT[chrx][int(txStart),int(txEnd)]=ENSEMBLT
+            except:
+                try:
+                    dChrdStrtEndENSEMBLT[chrx]={(int(txStart),int(txEnd)):ENSEMBLT}
+                except:
+                    dChrdStrtEndENSEMBLT={chrx:{(int(txStart),int(txEnd)):ENSEMBLT}}
+            #~
+            if cdsStartClmn and cdsEndClmn and startExsClmn and endExsClmn:
+                startExs,endExs=rtrnExnStarEndCorrc(startExs,endExs,cdsStart,cdsEnd)
+            else:
+                startExs,endExs=[int(txStart)],[int(txEnd)]
+            dENSEMBLTchrxStEndEx[ENSEMBLT]=(chrx,startExs,endExs,rvrse)
+    #~
+    dENSEMBLTseq={}
+    ENSEMBLTseqs=[(x.splitlines()[0],''.join(x.splitlines()[1:])) for x in open(inCDSfile).read().split('>') if x.strip()]
+    for ENSEMBLT,seq in ENSEMBLTseqs:
+        dENSEMBLTseq[ENSEMBLT]=seq
+    #~
+    dENSEMBLTseqChrStEnEx={}
+    for ENSEMBLT in dENSEMBLTchrxStEndEx:
+        chrx,startExs,endExs,rvrse=dENSEMBLTchrxStEndEx[ENSEMBLT]
+        addEseqChrStEnEx=True
+        try:
+            seq=dENSEMBLTseq[ENSEMBLT]
+            if rvrse:
+                seq=revseq(seq)
+        except:
+            addEseqChrStEnEx=False
+        if addEseqChrStEnEx:
+            dENSEMBLTseqChrStEnEx[ENSEMBLT]=(seq,chrx,startExs,endExs,rvrse)
+    return dENSEMBLTseqChrStEnEx,dChrdStrtEndENSEMBLT
+
+
+def rtrnFxdChrPosinCodReg(dChrdStrtEndENSEMBLT,dChrdPosdPopsAlllsInit):
+    """
+    """
+    dENSEMBLTChrPosdAlls={}
+    dChrPosdPopsAllls={}
+    todel=set(dChrdPosdPopsAlllsInit.keys()).difference(set(dChrdStrtEndENSEMBLT.keys()))
+    for x in todel:
+        x=dChrdPosdPopsAlllsInit.pop(x)
+    #---
+    while len(dChrdPosdPopsAlllsInit)>0:
+        chrx=dChrdPosdPopsAlllsInit.keys()[0]
+        dStrtEndENSEMBLT=dChrdStrtEndENSEMBLT.pop(chrx)
+        dPosdPopsAllls=dChrdPosdPopsAlllsInit.pop(chrx)
+        #~
+        srtdStrtEndENSEMBLT=sorted(dStrtEndENSEMBLT.keys())
+        srtdPosdPopsAllls=sorted(dPosdPopsAllls.keys())
+        #~
+        pos=srtdPosdPopsAllls.pop(0)
+        strt,end=srtdStrtEndENSEMBLT.pop(0)
+        ENSEMBLT=dStrtEndENSEMBLT[strt,end]
+        dPopsAllls=dPosdPopsAllls[pos]
+        keePloop=True
+        #~
+        while keePloop:
+            if strt<=pos<=end:
+                for tmpstrt,tmpend in [(strt,end)]+srtdStrtEndENSEMBLT:
+                    if tmpstrt<=pos<=tmpend:
+                        dPopsAllls=dPosdPopsAllls[pos]
+                        dChrPosdPopsAllls[chrx,pos]=dPopsAllls
+                        try:
+                            dENSEMBLTChrPosdAlls[ENSEMBLT][chrx,pos]=dPopsAllls
+                        except:
+                            dENSEMBLTChrPosdAlls[ENSEMBLT]={(chrx,pos):dPopsAllls}
+                    else:
+                        continue
+                if len(srtdPosdPopsAllls)>0:
+                    pos=srtdPosdPopsAllls.pop(0)
+                    dPopsAllls=dPosdPopsAllls[pos]
+                else:
+                    keePloop=False
+            #~
+            elif pos<=strt:
+                if len(srtdPosdPopsAllls)>0:
+                    pos=srtdPosdPopsAllls.pop(0)
+                    dPopsAllls=dPosdPopsAllls[pos]
+                else:
+                    keePloop=False
+            else:
+                if len(srtdStrtEndENSEMBLT)>0:
+                    strt,end=srtdStrtEndENSEMBLT.pop(0)
+                    ENSEMBLT=dStrtEndENSEMBLT[strt,end]
+                else:
+                    keePloop=False
+    return dENSEMBLTChrPosdAlls,dChrPosdPopsAllls
+
+def rtrnExnStarEndCorrc(startExs,endExs,cdsStart,cdsEnd):
+    """
+    """
+    cdsStart,cdsEnd=int(cdsStart),int(cdsEnd)
+    crrctdstartExs=set([int(x) for x in startExs.split(',') if x.strip()])
+    crrctdendExs=set([int(x) for x in endExs.split(',') if x.strip()])
+    crrctdstartExs.add(cdsStart)
+    crrctdendExs.add(cdsEnd)
+    sStartDel=set()
+    sEndDel=set()
+    #~
+    for echvl in crrctdstartExs:
+        if echvl<cdsStart or echvl>cdsEnd:
+            sStartDel.add(echvl)
+    #~
+    for echvl in crrctdendExs:
+        if echvl<cdsStart or echvl>cdsEnd:
+            sEndDel.add(echvl)
+    #~
+    return sorted(crrctdstartExs.difference(sStartDel)),sorted(crrctdendExs.difference(sEndDel))
+
+def rtrndPopsFasta(seq,chrx,startExs,endExs,rvrse,dChrPosdPopsAllls,ENSEMBLT):
+    """
+    """
+    exnIntrvl=zip(startExs,endExs)
+    CDSinitPos=exnIntrvl[0][0]
+    dexnIntrvlSeq={}
+    for exStart,exEnd in exnIntrvl:
+        lenEx=exEnd-exStart
+        dexnIntrvlSeq[exStart,exEnd]=seq[:lenEx]
+        seq=seq[lenEx:]
+
+    ldexnIntrvlSeq=len(dexnIntrvlSeq)
+    #~
+    dPopsFasta={}
+    #~
+    strePos=set()
+    dStrePosAbsPos={}
+    tmpAcmltdPos=0
+    #~
+    exStart,exEnd=sorted(dexnIntrvlSeq.keys())[0]
+    seq=dexnIntrvlSeq.pop((exStart,exEnd))
+    chrx,pos=sorted(dChrPosdPopsAllls.keys())[0]
+    dPopsAllls=dChrPosdPopsAllls.pop((chrx,pos))
+    tmpdPopsFasta=dict([(x,list(seq)) for x in dPopsAllls])
+    cntExns=0
+    while True:
+        if  exStart<=pos<=exEnd-1:
+            relPos=tmpAcmltdPos+pos-exStart
+            strePos.add(relPos)
+            dStrePosAbsPos[relPos]=pos
+            for echPop in tmpdPopsFasta:
+                allPop=dPopsAllls[echPop]
+                if rvrse:
+                    allPop=revComp(allPop)
+                tmpdPopsFasta[echPop][pos-exStart]=allPop
+            if len(dChrPosdPopsAllls)>0:
+                chrx,pos=sorted(dChrPosdPopsAllls.keys())[0]
+                dPopsAllls=dChrPosdPopsAllls.pop((chrx,pos))
+            else:
+                pos=endExs[-1]+100#max pos of exns
+        elif pos<exStart:
+            if len(dChrPosdPopsAllls)>0:
+                chrx,pos=sorted(dChrPosdPopsAllls.keys())[0]
+                dPopsAllls=dChrPosdPopsAllls.pop((chrx,pos))
+            else:
+                pos=endExs[-1]+100#max pos of exns
+        elif pos>exEnd-1:# or len(dChrPosdPopsAllls)==0:
+            for echPop in tmpdPopsFasta:
+                try:
+                    dPopsFasta[echPop]+=''.join(tmpdPopsFasta[echPop])
+                except:
+                    dPopsFasta[echPop]=''.join(tmpdPopsFasta[echPop])
+            cntExns+=1
+            tmpAcmltdPos+=len(seq)
+            if len(dexnIntrvlSeq)>0:
+                exStart,exEnd=sorted(dexnIntrvlSeq.keys())[0]
+                seq=dexnIntrvlSeq.pop((exStart,exEnd))
+                tmpdPopsFasta=dict([(x,list(seq)) for x in dPopsAllls])
+            else:
+                break
+    if ldexnIntrvlSeq!=cntExns:
+        for echPop in tmpdPopsFasta:
+            dPopsFasta[echPop]+=''.join(tmpdPopsFasta[echPop])
+    #~
+    lchrStartexEndpos=[]
+    if rvrse:
+        dPopsFasta=dict([(echPop,revseq(dPopsFasta[echPop])) for echPop in dPopsFasta])#[echPop]+=''.join(tmpdPopsFasta[echPop])
+        for ePos in strePos:
+            lchrStartexEndpos.append('\t'.join([ENSEMBLT,chrx,str(tmpAcmltdPos-ePos-1),str(dStrePosAbsPos[ePos])]))
+    else:
+        for ePos in strePos:
+            lchrStartexEndpos.append('\t'.join([ENSEMBLT,chrx,str(ePos),str(dStrePosAbsPos[ePos])]))
+    #~
+    return dPopsFasta,lchrStartexEndpos
+
+def rtrnSeqVars(dENSEMBLTseqChrStEnEx,dENSEMBLTChrPosdAlls):
+    """
+    """
+    dENSEMBLTPopsFasta={}
+    lchrStartexEndposAll=[]
+    #~
+    sENSEMBLTcmmn=set(dENSEMBLTChrPosdAlls.keys()).intersection(set(dENSEMBLTseqChrStEnEx.keys()))#sENSEMBLTcmmn between UCSC and ENSEMBLE
+    #~
+    for ENSEMBLT in sENSEMBLTcmmn:
+        seq,chrx,startExs,endExs,rvrse=dENSEMBLTseqChrStEnEx[ENSEMBLT]
+        dChrPosdPopsAllls=dENSEMBLTChrPosdAlls[ENSEMBLT]
+        if len(startExs)>0 and len(endExs)>0:
+            dPopsFasta,lchrStartexEndpos=rtrndPopsFasta(seq,chrx,startExs,endExs,rvrse,dChrPosdPopsAllls,ENSEMBLT)
+            lchrStartexEndposAll.extend(lchrStartexEndpos)
+            if dPopsFasta:#to correct a bug of the input table, in cases in which endExons<startExn (!). See ENSCAFT00000000145 (MC4R) in canFam2 for example.
+                dENSEMBLTPopsFasta[ENSEMBLT]=dPopsFasta
+    return dENSEMBLTPopsFasta,lchrStartexEndposAll
+
+
+
+def rtrnPhy(dPopsFasta,ENSEMBLT):
+    """
+    """
+    dPopsFormPhy={}
+    for eachPop in dPopsFasta:
+        hader='%s'%eachPop
+        #~ hader='>%s'%eachPop
+        seq=dPopsFasta[eachPop]
+        formtd='\t'.join([hader,seq])
+        #~ formtd='\n'.join([hader,seq])
+        dPopsFormPhy[eachPop]=formtd
+    #~
+    return dPopsFormPhy,len(seq)
+
+def wrapSeqsFasta(dENSEMBLTPopsFasta,outFastaFold,sPopsIntrst):
+    """
+    """
+    ENSEMBLTKaKs=[]
+    nonHeader=True
+    cnt=0
+    lENSEMBLT=len(dENSEMBLTPopsFasta)
+    #~
+    for ENSEMBLT in sorted(dENSEMBLTPopsFasta.keys()):
+        cnt+=1
+        dPopsFasta=dENSEMBLTPopsFasta[ENSEMBLT]
+        dPopsFormPhy,lenseq=rtrnPhy(dPopsFasta,ENSEMBLT)
+        #~
+        seqPMLformat=['%s %s'%(len(dPopsFormPhy),lenseq)]#generate new PHYML sequence
+        #~ seqPMLformat=[]#generate new PHYML sequence
+        for namex in sorted(sPopsIntrst):
+            seqPMLformat.append(dPopsFormPhy[namex])
+        #~
+        mkdir_p(outFastaFold)
+        outFastaf=os.path.join(outFastaFold,'%s.phy'%ENSEMBLT)
+        outFastaf=open(outFastaf,'w')
+        outFastaf.write('\n'.join(seqPMLformat))
+        outFastaf.close()
+        #~
+    return 0
+
+def main():
+    #~
+    #~bpython mkPhyl.py --input=colugo_mt_Galaxy_genotypes.txt --chrClmn=0 --posClmn=1 --refClmn=2 --altrClmn=3 --output=out.d --gd_indivs=genotypes.gd_indivs --inputCover=colugo_mt_Galaxy_coverage.txt --gd_indivs_cover=coverage.gd_indivs --cvrgTreshold=0 --chrClmnCvrg=0 --posClmnCvrg=1 --refClmnCvrg=2 --altrClmnCvrg=3 --indvlsPrctTrshld=0
+    parser = argparse.ArgumentParser(description='Returns the count of genes in KEGG categories and their statistical overrrepresentation, from a list of genes and an background file (i.e. plane text with ENSEMBLT and KEGG pathways).')
+    parser.add_argument('--input',metavar='input gd_snp file',type=str,help='the input file with the table in gd_snp/gd_genotype format.',required=True)
+    parser.add_argument('--chrClmn',metavar='int',type=int,help='the column with the chromosome.',required=True)
+    parser.add_argument('--posClmn',metavar='int',type=int,help='the column with the SNPs position.',required=True)
+    parser.add_argument('--refClmn',metavar='int',type=int,help='the column with the reference nucleotide.',required=True)
+    parser.add_argument('--altrClmn',metavar='int',type=int,help='the column with the derived nucleotide.',required=True)
+    parser.add_argument('--output',metavar='output',type=str,help='the output',required=True)
+    parser.add_argument('--output_id',metavar='int',type=int,help='the output id',required=True)
+    parser.add_argument('--output_dir',metavar='output folder sequences',type=str,help='the output folder with the sequences.',required=True)
+    parser.add_argument('--gd_indivs',metavar='input gd_indivs file',type=str,help='the input reference species columns in the input file.',required=True)
+    #~
+    parser.add_argument('--inputCover',metavar='input gd_snp cover file',type=str,help='the input file with the table in gd_snp/gd_genotype cover format.',required=False,default=False)
+    parser.add_argument('--gd_indivs_cover',metavar='input gd_indivs file',type=str,help='the input reference species columns in the input cover file.',required=False,default=False)
+    parser.add_argument('--cvrgTreshold',metavar='input coverage threshold',type=int,help='the coverage threshold above which nucleotides are included, else "N".',required=False,default=False)
+    parser.add_argument('--chrClmnCvrg',metavar='int',type=int,help='the column with the chromosome in the input coverage file.',required=False,default=False)
+    parser.add_argument('--posClmnCvrg',metavar='int',type=int,help='the column with the SNPs position in the input coverage file.',required=False,default=False)
+    parser.add_argument('--refClmnCvrg',metavar='int',type=int,help='the column with the reference nucleotide in the input coverage file.',required=False,default=False)
+    parser.add_argument('--altrClmnCvrg',metavar='int',type=int,help='the column with the derived nucleotide in the input coverage file.',required=False,default=False)
+    parser.add_argument('--indvlsPrctTrshld',metavar='int',type=float,help='the percentage of individual above which nucleotides are included, else "N".',required=False,default=False)
+    #~
+    parser.add_argument('--sequence',metavar='input fasta file',type=str,help='the input file with the sequence whose SNPs are in the input.',required=False,default=False)
+    parser.add_argument('--gene_info',metavar='input interval file',type=str,help='the input interval file with the the information on the genes.',required=False,default=False)
+    parser.add_argument('--fchrClmn',metavar='int',type=int,help='the column with the chromosome in the gene_info file.',required=False,default=False)
+    parser.add_argument('--txStartClmn',metavar='int',type=int,help='the column with the transcript start column in the gene_info file.',required=False,default=False)
+    parser.add_argument('--txEndClmn',metavar='int',type=int,help='the column with the transcript end column in the gene_info file.',required=False,default=False)
+    parser.add_argument('--strandClmn',metavar='int',type=int,help='the column with the strand column in the gene_info file.',required=False,default=False)
+    parser.add_argument('--geneNameClmn',metavar='int',type=int,help='the column with the gene name column in the gene_info file.',required=False,default=False)
+    parser.add_argument('--cdsStartClmn',metavar='int',type=int,help='the column with the coding start column in the gene_info file.',required=False,default=False)
+    parser.add_argument('--cdsEndClmn',metavar='int',type=int,help='the column with the coding end column in the gene_info file.',required=False,default=False)
+    parser.add_argument('--startExsClmn',metavar='int',type=int,help='the column with the exon start positions column in the gene_info file.',required=False,default=False)
+    parser.add_argument('--endExsClmn',metavar='int',type=int,help='the column with the exon end positions column in the gene_info file.',required=False,default=False)
+
+    args = parser.parse_args()
+
+    inSNPf = args.input
+    outfile = args.output
+    outfile_id = args.output_id
+    outFastaFold = './out'
+    files_dir = args.output_dir
+    gd_indivs = args.gd_indivs
+    pxchrx = args.chrClmn
+    pxpos = args.posClmn
+    pxntA = args.refClmn
+    pxntB = args.altrClmn
+
+
+    inCDSfile = args.sequence
+    inUCSCfile = args.gene_info
+    fchrClmn = args.fchrClmn#chromosome column
+    txStartClmn = args.txStartClmn#transcript start column
+    txEndClmn = args.txEndClmn#transcript end column
+    strandClmn = args.strandClmn#strand column
+    geneNameClmn = args.geneNameClmn#gene name column
+    cdsStartClmn = args.cdsStartClmn#coding sequence start column
+    cdsEndClmn = args.cdsEndClmn#coding sequence end column
+    startExsClmn = args.startExsClmn#exons start column
+    endExsClmn = args.endExsClmn#exons end column
+
+    inputCover = args.inputCover
+    gd_indivs_cover = args.gd_indivs_cover
+    cvrgTreshold = args.cvrgTreshold
+    pxchrxCov = args.chrClmnCvrg
+    pxposCov = args.posClmnCvrg
+    pxntACov = args.refClmnCvrg
+    pxntBCov = args.altrClmnCvrg
+    indvlsPrctTrshld = args.indvlsPrctTrshld
+
+    #print inputCover, gd_indivs_cover, cvrgTreshold
+
+    assert ((inputCover and gd_indivs_cover and cvrgTreshold>=0 and indvlsPrctTrshld>=0) or (inCDSfile and inUCSCfile))
+
+    #~
+    dPopsinSNPfPos=dict([(x.split()[1],int(x.split()[0])-1) for x in open(gd_indivs).read().splitlines() if x.strip()])
+    #~ dPopsinSNPfPos.update({'ref':False})
+    #~
+    sPopsIntrst=set(dPopsinSNPfPos.keys())
+    dChrdPosdPopsAlllsInit,seqref,chrx,startExs,endExs=rtrnFxdChrPos(inSNPf,dPopsinSNPfPos,pxchrx,pxpos,pxntA,pxntB)#~ print '1. Getting fixed alleles information...'
+    #~ dENSEMBLTseqChrStEnEx,dChrdStrtEndENSEMBLT=rtrndENSEMBLTseq(inCDSfile,inUCSCfile)
+    #~
+    if  inputCover and gd_indivs_cover and cvrgTreshold>=0:
+        dPopsinSNPfPos_cover=dict([(eachPop,False) for eachPop in dPopsinSNPfPos.keys()])
+        dPopsinSNPfPos_cover.update(dict([(x.split()[1],int(x.split()[0])-1) for x in open(gd_indivs_cover).read().splitlines() if x.strip()]))
+        dChrdPosdPopsAlllsInit,seqref,chrx,startExs,endExs=rtrnFxdChrPos(inputCover,dPopsinSNPfPos_cover,pxchrxCov,pxposCov,pxntACov,pxntBCov,dChrdPosdPopsAlllsInit,cvrgTreshold,indvlsPrctTrshld)
+        rvrse=False
+        dENSEMBLTseqChrStEnEx={'tmp':(seqref,chrx,startExs,endExs,rvrse)}
+        dChrdStrtEndENSEMBLT={chrx:{(startExs[0],endExs[0]):'tmp'}}
+    #~
+    elif inCDSfile and inUCSCfile:
+        dENSEMBLTseqChrStEnEx,dChrdStrtEndENSEMBLT=rtrndENSEMBLTseq(inCDSfile,inUCSCfile,fchrClmn,txStartClmn,txEndClmn,strandClmn,geneNameClmn,startExsClmn,endExsClmn,cdsStartClmn,cdsEndClmn)#~ print '2. Getting transcripts and exons information...'
+    #~
+    dENSEMBLTChrPosdAlls,dChrPosdPopsAllls=rtrnFxdChrPosinCodReg(dChrdStrtEndENSEMBLT,dChrdPosdPopsAlllsInit)#~ print '3. Getting fixed alleles in exons...'
+    #~
+    dENSEMBLTPopsFasta,lchrStartexEndposAll=rtrnSeqVars(dENSEMBLTseqChrStEnEx,dENSEMBLTChrPosdAlls)#~ print '4. Getting fasta sequences of populations...'
+    #~
+    wrapSeqsFasta(dENSEMBLTPopsFasta,outFastaFold,sPopsIntrst)
+    #~
+
+
+    ## get a lit of output files
+    files = []
+    for dirpath, dirnames, filenames in os.walk(outFastaFold):
+        for file in filenames:
+            if file.endswith('.phy'):
+                files.append( os.path.join(dirpath, file) )
+        del dirnames[:]
+
+    if len(files) == 0:
+        with open(outfile, 'w') as ofh:
+            print >> ofh, 'No output.'
+    else:
+        ## the first file becomes the output
+        file = files.pop(0)
+        shutil.move(file, outfile)
+
+        ## rename/move the rest of the files
+        for i, file in enumerate(files):
+            new_filename = 'primary_{0}_output{1}_visible_txt_?'.format(outfile_id, i+2)
+            new_pathname = os.path.join(files_dir, new_filename)
+            shutil.move(file, new_pathname)
+
+    return 0
+
+if __name__ == '__main__':
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/make_phylip.xml	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,178 @@
+<tool id="gd_make_phylip" name="Phylip" version="1.0.0" force_history_refresh="True">
+  <description>: prepare data for phylogenetic analysis</description>
+
+  <command interpreter="python">
+    #set $zero_based = 1
+    #set $gen_chrClmn = int($input.metadata.scaffold) - $zero_based
+    #set $gen_posClmn = int($input.metadata.pos) - $zero_based
+    #set $gen_refClmn = int($input.metadata.pos) - $zero_based + 1
+    #set $gen_altrClmn = int($input.metadata.pos) - $zero_based + 2
+    make_phylip.py '--altrClmn=$gen_altrClmn' '--chrClmn=$gen_chrClmn' '--gd_indivs=$indivs_input' '--input=$input' '--output=$output1' '--output_id=$output1.id' '--output_dir=$__new_file_path__' '--posClmn=$gen_posClmn' '--refClmn=$gen_refClmn'
+    #if $input_type.choice == '0'
+      #set $cov_chrClmn = int($input_type.coverage_input.metadata.scaffold) - $zero_based
+      #set $cov_posClmn = int($input_type.coverage_input.metadata.pos) - $zero_based
+      #set $cov_refClmn = int($input_type.coverage_input.metadata.pos) - $zero_based + 1
+      #set $cov_altrClmn = int($input_type.coverage_input.metadata.pos) - $zero_based + 2
+      '--altrClmnCvrg=$cov_altrClmn' '--chrClmnCvrg=$cov_chrClmn' '--cvrgTreshold=$input_type.coverage_threshold' '--gd_indivs_cover=$indivs_input' '--indvlsPrctTrshld=$input_type.indivs_threshold' '--inputCover=$input_type.coverage_input' '--posClmnCvrg=$cov_posClmn' '--refClmnCvrg=$cov_refClmn'
+    #else if $input_type.choice == '1'
+      #set $fchrClmn = int($input_type.annotation_input.metadata.chromCol) - $zero_based
+      #set $strandClmn = int($input_type.annotation_input.metadata.strandCol) - $zero_based
+      #set $geneNameClmn = int($input_type.annotation_input.metadata.nameCol) - $zero_based
+      #set $txStartClmn = int(str($input_type.tx_start_col)) - $zero_based
+      #set $txEndClmn = int(str($input_type.tx_end_col)) - $zero_based
+      #set $cdsStartClmn = int(str($input_type.cds_start_col)) - $zero_based
+      #set $cdsEndClmn = int(str($input_type.cds_end_col)) - $zero_based
+      #set $startExsClmn = int(str($input_type.exs_start_col)) - $zero_based
+      #set $endExsClmn = int(str($input_type.exs_end_col)) - $zero_based
+      '--cdsEndClmn=$cdsEndClmn' '--cdsStartClmn=$cdsStartClmn' '--endExsClmn=$endExsClmn' '--fchrClmn=$fchrClmn' '--geneNameClmn=$geneNameClmn' '--gene_info=$input_type.annotation_input' '--sequence=$input_type.fasta_input' '--startExsClmn=$startExsClmn' '--strandClmn=$strandClmn' '--txEndClmn=$txEndClmn' '--txStartClmn=$txStartClmn'
+    #end if
+  </command>
+
+  <inputs>
+    <param name="input" type="data" format="gd_genotype,gd_snp" label="Genotype/SNP dataset">
+      <validator type="metadata" check="scaffold" message="scaffold missing" />
+      <validator type="metadata" check="pos" message="pos missing" />
+    </param>
+    <param name="indivs_input" type="data" format="gd_indivs" label="Individuals dataset" />
+    <conditional name="input_type">
+      <param name="choice" type="select" format="integer" label="Input type">
+        <option value="0" selected="true">Coverage</option>
+        <option value="1">Genes</option>
+      </param>
+      <when value="0">
+        <param name="coverage_input" type="data" format="gd_genotype,gd_snp" label="Coverage dataset">
+          <validator type="metadata" check="scaffold" message="scaffold missing" />
+          <validator type="metadata" check="pos" message="pos missing" />
+        </param>
+        <param name="coverage_threshold" type="integer" min="1" value="1" label="Coverage threshold" />
+        <param name="indivs_threshold" type="float" value="0.5" min="0.0" max="1.0" label="Individuals genotype percentage threshold" />
+      </when>
+      <when value="1">
+        <param name="annotation_input" type="data" format="interval" label="Genes dataset">
+          <validator type="metadata" check="chromCol" message="chromCol missing" />
+          <validator type="metadata" check="strandCol" message="strandCol missing" />
+          <validator type="metadata" check="nameCol" message="nameCol missing" />
+        </param>
+        <param name="tx_start_col" type="data_column" data_ref="input" label="Genes transcript start column" />
+        <param name="tx_end_col" type="data_column" data_ref="input" label="Genes transcript end column" />
+        <param name="cds_start_col" type="data_column" data_ref="input" label="Genes coding sequence start column" />
+        <param name="cds_end_col" type="data_column" data_ref="input" label="Genes coding sequence end column" />
+        <param name="exs_start_col" type="data_column" data_ref="input" label="Genes exon starts column" />
+        <param name="exs_end_col" type="data_column" data_ref="input" label="Genes exon ends column" />
+        <param name="fasta_input" type="data" format="fasta" label="FASTA dataset" />
+      </when>
+    </conditional>
+  </inputs>
+
+  <outputs>
+    <data name="output1" format="txt" />
+  </outputs>
+
+  <help>
+**What it does**
+
+This tool creates phylip formatted files from two different input types:
+coverage and genes.
+
+If the coverage option is selected the inputs for the program are:
+
+   1. a gd_indivs table
+   2. a gd_genotype file with the coverage information for individuals in the gd_indivs table
+   3. a gd_genotype file with the genotype information for individuals in the gd_indivs table
+   4. a coverage threshold (optional)
+   5. a percentage of individuals (threshold).
+
+The program produces a phylip formatted file using the sequence in the
+genotype file as a template.  In this sequence nucleotides for each
+sequence that are below the coverage threshold, or the positions with
+a percentage of individuals below the selected value are replaced by "N".
+
+If the gene option is selected the inputs for the program are:
+
+   1. a gd_indivs table
+   2. a gene dataset table with a gene name in the first column
+   3. the column with transcript start in the gene dataset table
+   4. the column with transcript end in the gene dataset table
+   5. the column with coding start in the gene dataset table
+   6. the column with coding end in the gene dataset table
+   7. the column with exon starts (comma-separated) in the gene dataset table
+   8. the column with exon ends (comma-separated) in the gene dataset table
+   9. a FASTA formatted file for all the genes of interest with their names as headers (NOTE: these names should be the same in the input gene dataset table).
+
+The program produces as output one phylip formatted file for each gene
+in the gene dataset table.
+
+-----
+
+**Example**
+
+In a case were the option coverage is selected, for the inputs:
+
+- gd_indivs::
+
+   7       W_Java
+   10      E_Java
+   16      Pen_Ma
+   ...
+
+- Genotype table::
+
+   chrM 15 T C -1 -1 2 -1 -1 2 -1 -1 -1 -1 -1 2 -1 -1 -1 -1 0 -1 -1
+   chrM 18 G A -1 -1 0 -1 -1 0 -1 -1 -1 -1 -1 0 -1 -1 -1 -1 0 -1 -1
+   chrM 20 C T -1 -1 0 -1 -1 2 -1 -1 -1 -1 -1 0 -1 -1 -1 -1 0 -1 -1
+   ...
+
+- Coverage table::
+
+   chrM 0 G G 0 0 0 0 0 0 0 0 0 0 0 0 0
+   chrM 1 T T 0 0 3 0 0 50 0 0 0 0 0 2 0
+   chrM 2 T T 0 0 5 0 0 50 0 0 0 0 0 2 0
+   ...
+
+- Coverage threshold = 0
+
+- Percentage of individuals = 0.0
+
+- The output is::
+
+   4 19 15428
+   W_Java  GTTCATCATGTTCATCGAAT
+   E_Java  GTTCATCATGTTCATCGAAC
+   Pen_Ma  GTTCATCATGTTCATCGAAT
+
+In a case were option genotype is selected with the inputs:
+
+- Gene dataset table input::
+
+   1 ENSLAFT00000017123 chrM + 1002 1061 1002 1061 1 1002, 1061, 0 ENSLAFG00000017122 cmpl incmpl 0, BTRC ENSLAFT00000017123 ENSLAFP00000014355
+   1 ENSLAFT00000037164 chrM - 1058 1092 1062 1073 1 1062,1068 1065,1073 0 ENSLAFG00000007680 cmpl cmpl 0, MYOF ENSLAFT00000037164 ENSLAFP00000025175 26509
+   1 ENSLAFT00000008925 chrM + 990 1000 990 1000 1 990, 1000, 0 ENSLAFG00000008924 incmpl incmpl 0, PRKG1 ENSLAFT00000008925 ENSLAFP00000007492
+   ...
+
+In this table:
+
+   column with transcript start = 5
+   column with transcript end = 6
+   column with coding start = 7
+   column with coding end = 8
+   column with exon starts = 10
+   column with exon ends = 11
+
+- gd_indivs::
+
+   7       W_Java
+   10      E_Java
+   16      Pen_Ma
+   ...
+
+- Genotype table::
+
+   chrM 1005 T C -1 -1 2 -1 -1 2 -1 -1 -1 -1 -1 2 -1 -1 -1 -1 0 -1 -1
+   chrM 1060 G A -1 -1 0 -1 -1 0 -1 -1 -1 -1 -1 0 -1 -1 -1 -1 0 -1 -1
+   chrM 991 C T -1 -1 0 -1 -1 2 -1 -1 -1 -1 -1 0 -1 -1 -1 -1 0 -1 -1
+   ...
+
+The outputs are going to one file for each sequence in the input gene
+dataset table (as long as they are included in the input FASTA file).
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/map_ensembl_transcripts.xml	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,81 @@
+<tool id="gd_new_oscar" name="Get Pathways" version="1.0.0">
+  <description>: Look up KEGG pathways for given Ensembl transcripts</description>
+
+  <command interpreter="python">
+    rtrnKEGGpthwfENSEMBLTc.py
+      "--loc_file=${GALAXY_DATA_INDEX_DIR}/gd.oscar.loc"
+      "--species=${input.metadata.dbkey}"
+      "--input=${input}"
+      "--posENSEMBLclmn=${ensembl_col}"
+      "--output=${output}"
+  </command>
+
+  <inputs>
+    <param name="input" type="data" format="tabular" label="Dataset" >
+       <validator type="unspecified_build" message="This dataset does not have a database/build and cannot be used with this tool" />
+    </param>
+    <param name="ensembl_col" type="data_column" data_ref="input" label="Column with ENSEMBL transcript ID" />
+  </inputs>
+
+  <outputs>
+    <data name="output" format="tabular" />
+  </outputs>
+
+  <!--
+  <tests>
+    <test>
+      <param name="input" value="test_in/ensembl.tabular" ftype="tabular">
+        <metadata name="dbkey" value="canFam2" />
+      </param>
+      <param name="ensembl_col" value="1" />
+
+      <output name="output" file="test_out/map_ensembl_transcripts/map_ensembl_transcripts.tabular" />
+    </test>
+  </tests>
+  -->
+
+  <help>
+
+**Dataset formats**
+
+The input and output datasets are in tabular_ format.
+The input dataset must have a column with an ENSEMBL transcript ID and have
+the database/build set.  Even though positions are not needed the correct
+database/build must be given to look up the pathways.
+The output dataset will have added columns for the pathway.
+(`Dataset missing?`_)
+
+.. _tabular: ./static/formatHelp.html#tab
+.. _Dataset missing?: ./static/formatHelp.html
+
+-----
+
+**What it does**
+
+Adds the fields "KEGG gene ID" and "KEGG pathways" to an input table of ENSEMBL
+transcript IDs.  A "U" in the KEGG gene ID field indicates that the
+tool cannot link the ENSEMBL transcript ID to a KEGG gene ID.
+An "N" in the pathway field means the KEGG pathway is unknown.
+
+-----
+
+**Example**
+
+- input::
+
+   ENSCAFT00000000001
+   ENSCAFT00000000144
+   ENSCAFT00000000160
+   ENSCAFT00000000215
+   etc.
+
+- output::
+
+   ENSCAFT00000000001      476153  cfa00230=Purine metabolism.cfa00500=Starch and sucrose metabolism.cfa00740=Riboflavin metabolism.cfa00760=Nicotinate and nicotinamide metabolism.cfa00770=Pantothenate and CoA biosynthesis.cfa01100=Metabolic pathways
+   ENSCAFT00000000144      483960  N
+   ENSCAFT00000000160      610160  N
+   ENSCAFT00000000215      U       N
+   etc.
+
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/mkpthwpng.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,92 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+#       mkpthwpng.py
+#
+#       Copyright 2011 Oscar Bedoya-Reina <oscar@niska.bx.psu.edu>
+#
+#       This program is free software; you can redistribute it and/or modify
+#       it under the terms of the GNU General Public License as published by
+#       the Free Software Foundation; either version 2 of the License, or
+#       (at your option) any later version.
+#
+#       This program is distributed in the hope that it will be useful,
+#       but WITHOUT ANY WARRANTY; without even the implied warranty of
+#       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#       GNU General Public License for more details.
+#
+#       You should have received a copy of the GNU General Public License
+#       along with this program; if not, write to the Free Software
+#       Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+#       MA 02110-1301, USA.
+
+import argparse,mechanize,os,sys
+
+#this return an image made up from a list of genes and pathway code
+def rtnHTMLformat(tmpddGenrcgenPresent,sppPrefx,pthwcod,ouPthwpng):
+	inpx='\n'.join(tmpddGenrcgenPresent)#inpx="ALDH2 color \nALDH3A1	color"
+	request=mechanize.Request("http://www.genome.jp/kegg/tool/map_pathway2.html")
+	response = mechanize.urlopen(request)
+	forms = mechanize.ParseResponse(response, backwards_compat=False)
+	form=forms[0]
+	form["unclassified"]=inpx
+	form["org"]=sppPrefx
+	request2 = form.click()
+	response2 = mechanize.urlopen(request2)
+	a=str(response2.read()).split('href="/kegg-bin/show_pathway?')[1]
+	code=a.split('/')[0]#response2.read()
+	request=mechanize.Request("http://www.genome.jp/kegg-bin/show_pathway?%s/%s.args"%(code,pthwcod))#request=mechanize.Request("http://www.genome.jp/kegg-bin/show_pathway?%s/%s.args"%('13171478854246','hsa00410'))
+	response = mechanize.urlopen(request)
+	forms = mechanize.ParseResponse(response, backwards_compat=False)
+	form=forms[1]
+	status=' NOT '
+	try:
+		imgf=str(forms[1]).split('/mark_pathway')[1].split('/')[0]
+		os.system("wget --quiet http://www.genome.jp/tmp/mark_pathway%s/%s.png -O %s"%(imgf,pthwcod,ouPthwpng))
+		status=' '
+	except:
+		pass
+	return 'A pathway image was%ssuccefully produced...'%status
+
+
+def main():
+	parser = argparse.ArgumentParser(description='Obtain KEGG images from a list of genes.')
+	parser.add_argument('--input',metavar='input TXT file',type=str,help='the input file with the table in txt format')
+	parser.add_argument('--output',metavar='output PNG image',type=str,help='the output image file in png format')
+	parser.add_argument('--KEGGpath',metavar='KEGG pathway code (i.e. cfa00230)',type=str,help='the code of the pathway of interest')
+	parser.add_argument('--posKEGGclmn',metavar='column number',type=int,help='the column with the KEGG pathway code/name')
+	parser.add_argument('--KEGGgeneposcolmn',metavar='column number',type=int,help='column with the KEGG gene code')
+	#~Open arguments
+	class C(object):
+		pass
+	fulargs=C()
+	parser.parse_args(sys.argv[1:],namespace=fulargs)
+	#test input vars
+	inputf,outputf,KEGGpathw,posKEGGclmn,Kgeneposcolmn=fulargs.input,fulargs.output,fulargs.KEGGpath,fulargs.posKEGGclmn,fulargs.KEGGgeneposcolmn
+    # make posKEGGclmn, Kgeneposcolmn 0-based
+	sppPrefx= KEGGpathw[:3]
+	posKEGGclmn -= 1
+	Kgeneposcolmn -= 1
+	#make a dictionary of valid genes
+	dKEGGcPthws=dict([(x.split('\t')[Kgeneposcolmn],set([y.split('=')[0] for y in x.split('\t')[posKEGGclmn].split('.')])) for x in open(inputf).read().splitlines()[1:] if x.strip()])
+	for mt1gene in [x for x in dKEGGcPthws.keys() if x.find('.')>-1]:#to crrect names with more than one gene
+		pthwsAssotd=dKEGGcPthws.pop(mt1gene)
+		for eachg in mt1gene.split('.'):
+			dKEGGcPthws[eachg]=pthwsAssotd
+	tmpddGenrcgenPresent=set()
+	sKEGGc=dKEGGcPthws.keys()
+	lsKEGGc=len(sKEGGc)
+	ctPthw=0
+	while ctPthw < lsKEGGc:#to save memory
+		eachK=sKEGGc.pop()
+		alPthws=dKEGGcPthws[eachK]
+		if KEGGpathw in alPthws:
+			tmpddGenrcgenPresent.add('\t'.join([eachK,'red']))
+		ctPthw+=1
+	#run the program
+	rtnHTMLformat(tmpddGenrcgenPresent,sppPrefx,KEGGpathw,outputf)
+	return 0
+
+
+if __name__ == '__main__':
+	main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/multiple_to_gd_genotype.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,514 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+#       multiple_to_gd_genotype.py
+#
+#       Copyright 2013 Oscar Reina <oscar@niska.bx.psu.edu>
+#
+#       This program is free software; you can redistribute it and/or modify
+#       it under the pathways of the GNU General Public License as published by
+#       the Free Software Foundation; either version 2 of the License, or
+#       (at your option) any later version.
+#
+#       This program is distributed in the hope that it will be useful,
+#       but WITHOUT ANY WARRANTY; without even the implied warranty of
+#       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#       GNU General Public License for more details.
+#
+#       You should have received a copy of the GNU General Public License
+#       along with this program; if not, write to the Free Software
+#       Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+#       MA 02110-1301, USA.
+
+import argparse
+import base64
+import json
+import os
+import sys
+
+def fold_line(line, maxlen=200, prefix="#"):
+	"""
+	format hader to a 200 char max.
+	"""
+	line_len = len(line)
+	prefix_len = len(prefix)
+
+	if line_len + prefix_len <= maxlen:
+		return '%s%s' % (prefix, line)
+
+	lines = []
+	start_idx = 0
+	offset = 0
+
+	while start_idx < line_len - 1:
+		last_idx = start_idx
+		idx = start_idx
+		start = start_idx
+
+		while idx != -1 and idx < maxlen + prefix_len + offset - 1:
+			last_idx = idx
+			idx = line.find(',', start)
+			start = idx+1
+
+		if idx == -1:
+			lines.append('%s%s' % (prefix, line[start_idx:]))
+			break
+
+		lines.append('%s%s' % (prefix, line[start_idx:last_idx+1]))
+		start_idx = last_idx + 1
+		offset = last_idx + 1
+
+	return '\n'.join(lines)
+
+
+def formthdr(lPops,dbkey,species):
+	"""
+	returns a formated metadata for a gd_genotype file from a paramSet
+	dictionary and a list (lPops) of individuals
+	"""
+	clmnsVals=', '.join(['"%sG"'%(x+1) for x in range(len(lPops))])#"'1G', '2G', ..."
+	indvdls='], ['.join(['"%s", %s'%(lPops[x],x+5) for x in range(len(lPops))])#['DU23M01 Duroc domestic breed Europe', 5], ['DU23M02 Duroc domestic breed Europe', 6], ...
+	obj='{"rPos": 2, "column_names": ["chr", "pos", "A", "B", %s], "scaffold": 1, "pos": 2, "dbkey": "%s", "individuals": [[%s]], "ref": 1, "species": "%s"}'%(clmnsVals,dbkey,indvdls,species)
+	json_value = json.loads(obj)
+	hdr = fold_line(json.dumps(json_value, separators=(',',':'), sort_keys=True))
+	#~
+	return hdr
+
+
+def formthdr_gdsnp(lPops,dbkey,species):
+	"""
+	returns a formated metadata for a gd_genotype file from a paramSet
+	dictionary and a list (lPops) of individuals
+	"""
+	clmnsVals=', '.join(['"%sA","%sB","%sG","%sQ"'%((x+1),(x+1),(x+1),(x+1)) for x in range(len(lPops))])#"'1G', '2G', ..."
+	indvdls='], ['.join(['"%s", %s'%(lPops[x],(((x+1)*4)+2)) for x in range(len(lPops))])#['DU23M01 Duroc domestic breed Europe', 5], ['DU23M02 Duroc domestic breed Europe', 9], ...
+	obj='{"rPos": 2, "column_names": ["chr", "pos", "A", "B", "Q", %s], "scaffold": 1, "pos": 2, "dbkey": "%s", "individuals": [[%s]], "ref": 1, "species": "%s"}'%(clmnsVals,dbkey,indvdls,species)
+	json_value = json.loads(obj)
+	hdr = fold_line(json.dumps(json_value, separators=(',',':'), sort_keys=True))
+	#~
+	return hdr
+
+
+def selAnc(SNPs):
+	"""
+	returns the ancestral and derived snps, and an gd_genotype-encoded
+	list of SNPs/nucleotides
+	"""
+	dIUPAC={'AC':'M','AG':'R','AT':'W','CG':'S','CT':'Y','GT':'K'}#'N':'N','A':'A','T':'T','C':'C','G':'G',
+	dNtsCnts={}
+	for eSNP in SNPs:
+		if eSNP!='N':
+			try:
+				dNtsCnts[eSNP]+=1
+			except:
+				dNtsCnts[eSNP]=1
+	#~
+	nAlleles=len(dNtsCnts.keys())
+	assert nAlleles<=3
+	if nAlleles==3:
+		nonCons=[x for x in dNtsCnts.keys() if x in set(['A','T','C','G'])]
+		cons=[x for x in dNtsCnts.keys() if x not in nonCons]
+		assert len(nonCons)==2 and len(cons)==1 and dIUPAC[''.join(sorted(nonCons))]==''.join(cons)
+		ancd=nonCons[0]
+		dervd=nonCons[1]
+		if dNtsCnts[dervd]>dNtsCnts[ancd]:
+			ancd,dervd=dervd,ancd
+	elif nAlleles==2:
+		cons=set(dNtsCnts.keys()).intersection(set(dIUPAC.values()))
+		assert len(cons)<=1
+		if len(cons)==0:
+			ancd,dervd=dNtsCnts.keys()
+			if dNtsCnts[dervd]>dNtsCnts[ancd]:
+				ancd,dervd=dervd,ancd
+		else:
+			dervd=''.join(cons)
+			ancd=''.join([x for x in dNtsCnts.keys() if x!=dervd])
+	else:#<=1
+		ancd=''.join(dNtsCnts.keys())
+		dervd='N'
+	#~
+	lSNPsEncoded=[]
+	for eSNP in SNPs:
+		if eSNP=='N':
+			lSNPsEncoded.append('-1')
+		elif eSNP==ancd:
+			lSNPsEncoded.append('2')
+		elif eSNP==dervd:
+			lSNPsEncoded.append('0')
+		else:
+			lSNPsEncoded.append('1')
+	#~
+	return ancd,dervd,lSNPsEncoded
+
+
+
+def from_csv(in_csv,outgd_gentp,dbkey,species):
+	"""
+	returns a gd_genotype file format from csv file (saved in excel).
+	The input must consist of a set of rows with columns splited by a
+	comma. The first row must contain the names of the individuals. For
+	the other rows, the first of column must contain the chromosome and
+	position of the snp. The other columns must contain any kind of
+	fstat or genepop allele valid encoding, with at	most 2 alleles. Also,
+	the user may input IUPAC valid nucleotide symbols. The program will
+	assume that the mosts common is the ancestral.
+	------------- The file starts here ----------------
+	,Ind_1,Ind_2,Ind_3,Ind_4,Ind_5,Ind_6,Ind_7
+	chr1 12334123,A,T,T,A,T,T,W
+	chr2 1232654,C,G,G,C,N,S,N
+	chr3    3356367,T,G,G,G,G,K,K
+	chr4    95673,A,C,C,A,C,M,M
+	chr5 45896,T,C,Y,Y,Y,C,T
+	...
+	or
+	...
+	chr6 2354,22,11,21,00,12,12,22
+	------------- The file ends here -------------------
+	"""
+	infoLn=True
+	slf=open(outgd_gentp,'w')
+	for echl in open(in_csv,'r'):
+		if echl.strip():
+			if infoLn:
+				lPops=[x for x in echl.splitlines()[0].split(',') if x.strip()]
+				hdr=formthdr(lPops,dbkey,species)
+				slf.write('%s\n'%hdr)
+				infoLn=False
+			else:
+				lsplitd=echl.splitlines()[0].split(',')
+				lchrpox,SNPs=lsplitd[0],[x for x in lsplitd[1:] if x.strip()]
+				lchrpox='\t'.join(lchrpox.strip().split())
+				if SNPs[0].isdigit():
+					lSNPsEncoded=[]
+					for snp in SNPs:
+						cnt=0
+						for ep in snp:
+							ep=int(ep)
+							assert ep<=2
+							cnt+=ep
+						cnt=4-cnt
+						if cnt==4:
+							frmtdSNP='-1'
+						else:
+							frmtdSNP=str(cnt)
+						lSNPsEncoded.append(frmtdSNP)
+					ancd,dervd='N','N'
+				else:
+					ancd,dervd,lSNPsEncoded=selAnc(SNPs)
+				outfrmtdDat='%s\t%s\t%s\t%s'%(lchrpox,ancd,dervd,'\t'.join(lSNPsEncoded))
+				slf.write('%s\n'%outfrmtdDat)
+	#~
+	slf.close()
+	return 0
+
+
+def from_fstat(in_fstat,outgd_gentp,dbkey,species):
+	"""
+	returns a gd_genotype file format from fstat file. Ignores pops
+	structures and alleles other than the combinations of the alleles
+	encoded by 0, 1, and 2 up to 9 digits. The first line contains 4
+	numbers separated by any number of spaces: number of samples, np,
+	number of loci, nl, highest number used to label an	allele, nu
+	(?=2), and 1 if the code for alleles is a one digit	number (1-2), a
+	2 if code for alleles is a 2 digit number (01-02) or a 3 if code for
+	alleles is a 3 digit number (001-002). Followed by nl lines: each
+	containing the name of a locus, in the order they will appear in the
+	rest of the file On line nl+2, a series of numbers as follow: 1 0102
+	0101 0101 0201 0 0101 first number: identifies the sample to which
+	the individual belongs second: the genotype of the individual at the
+	first locus, coded with a 2 digits number for each allele third: the
+	genotype at the second locus, until locus nl is entered. Missing
+	genotypes are encoded with 0 (0001 or 0100 are not a valid format,
+	so both alleles at a locus have to be known, otherwise, the genotype
+	is considered as missing) no empty lines are needed between samples
+	number of spaces between genotypes can be anything. Numbering of
+	samples need not be sequential the number of samples np needs to be
+	the same as the largest sample identifier. Samples need not to be
+	ordered nu needs to be equal to the largest code given to an allele
+	(even if there are less than nu alleles). Ancestral is taken as 01,
+	derived 02. In all cases ancestral and derived SNPs are returned as
+	N.
+	------------- The file starts here ----------------
+	7  5  2  1
+	chr1 12334123
+	chr2 1232654
+	chr3    3356367
+	chr4    95673
+	chr5 45896
+	   Ind_1   22 22 21 11 22
+	   Ind_2   22 22 11 12 22
+	   Ind_3   22 11 22 21 22
+	   Ind_4   22 21 22 21 22
+	   Ind_5   22 22 22 21 22
+	   Ind_6   22 22 22 22 22
+	   Ind_7   22 22 21 21 22
+	------------- The file ends here -------------------
+	"""
+	dChrPos,lPops,lChrPos,addPop={},[],[],False
+	clines=-1
+	for echl in open(in_fstat,'r'):
+		clines+=1
+		if echl.strip():
+			if clines==0:
+				nSmpls,nLoci,nUsed,nDigs=[x for x in echl.splitlines()[0].split() if x.strip()]
+				nLoci=int(nLoci)
+			elif clines<=nLoci:
+				addPop=True
+				lchrpox='\t'.join(echl.strip().split())
+				lChrPos.append(lchrpox)
+			elif addPop:
+				lsplitd=echl.splitlines()[0].split()
+				pop,SNPs=lsplitd[0],[x for x in lsplitd[1:] if x.strip()]
+				pop=pop.strip()
+				for x in range(nLoci):
+					snp=SNPs[x]
+					cnt=0
+					for ep in snp:
+						ep=int(ep)
+						assert ep<=2
+						cnt+=ep
+					cnt=4-cnt
+					if cnt==4:
+						frmtdSNP='-1'
+					else:
+						frmtdSNP=str(cnt)
+					try:
+						dChrPos[lChrPos[x]].append(frmtdSNP)
+					except:
+						dChrPos[lChrPos[x]]=[frmtdSNP]
+				#~
+				lPops.append(pop)
+	#~
+	hdr=formthdr(lPops,dbkey,species)
+	outfrmtdDat=['%s\t%s\t%s\t%s'%(x,'N','N','\t'.join(dChrPos[x])) for x in lChrPos]
+	#~
+	slf=open(outgd_gentp,'w')
+	slf.write('\n'.join([hdr,'\n'.join(outfrmtdDat)]))
+	slf.close()
+	return 0
+
+
+def from_genepop(in_genepop,outgd_gentp,dbkey,species):
+	"""
+	returns a gd_genotype file format from genepop file . Ignores pops
+	structures and alleles other than the combinations of the alleles
+	encoded by 00, 01, and 02. The second line must contain the chromosome
+	and position of the SNPs separated by an space or a tab. Each loci
+	should be separated by a comma. Alternatively, they may be given one
+	per line. They may be given one per line, or on the same line but
+	separated by commas. The name of individuals are defined as
+	everything on the left of a comma, and their genotypes following the
+	same order of the SNPs in the second line. Ancestral is taken as 01,
+	derived 02. In all cases ancestral and derived SNPs are returned as N
+		------------- The file starts here ----------------
+	Microsat on Chiracus radioactivus, a pest species
+		 chr1 23123, chr2 90394, chr3 90909, chr3 910909, chr4 10909
+	POP
+	AA2, 0201 0111 0102 0000      0101
+	AA1, 0201 0201 0202 0000      0101
+	A10, 0201 0201 0101 0000      0101
+	A11, 0201 0202 0102 0000      0102
+	A12, 0202 0201 0101 0000      0101
+	A11, 0101 0101 0101 0000      0101
+	A12, 0202 0201 0201 0000      0101
+	A11, 0201 0201 0101 0000      0101
+	Pop
+	AF1, 0000 0000 0000 0000      0101
+	AF2, 0201 0101 0102 0000      0101
+	AF3, 0202 0201 0202 0000      0101
+	AF4, 0201 0101 0000 0000      0101
+	AF5, 0201 0101 0202 0000      0101
+	AF6, 0101 0101 0102 0000      0101
+	AF7, 0201 0100 0000 0000      0101
+	AF8, 0101 0100 0000 0000      0201
+	AF9, 0201 0200 0000 0000      0101
+	AF10, 0101 0202 0202 0000      0101
+	pop
+	C211, 0101 0202 0202 0000      0101
+	C211, 0101 0101 0202 0000      0101
+	C21E, 0101 0102 0202 0000      0101
+	C21B, 0101 0101 0102 0000      0201
+	C21C, 0201 0101 0202 0000      0101
+	C21D, 0201 0101 0202 0000      0201
+	------------- The file ends here -------------------
+	"""
+	dChrPos,lPops,lChrPos,addPop,addDat={},[],[],False,True
+	clines=-1
+	for echl in open(in_genepop,'r'):
+		clines+=1
+		if echl.strip():
+			if echl.strip() in set(['pop','POP','Pop']):
+				addDat,addPop=False,True
+			elif addDat and clines>0:
+				lchrpox=['\t'.join(x.split()) for x in echl.split(',') if x.strip()]
+				lChrPos.extend(lchrpox)
+			elif addPop:
+				pop,SNPs=echl.splitlines()[0].split(',')
+				pop=pop.strip()
+				SNPs=[x for x in SNPs.split() if x.strip()]
+				for x in range(len(SNPs)):
+					snp=SNPs[x]
+					cnt=0
+					for ep in snp:
+						ep=int(ep)
+						assert ep<=2
+						cnt+=ep
+					cnt=4-cnt
+					if cnt==4:
+						frmtdSNP='-1'
+					else:
+						frmtdSNP=str(cnt)
+					try:
+						dChrPos[lChrPos[x]].append(frmtdSNP)
+					except:
+						dChrPos[lChrPos[x]]=[frmtdSNP]
+				#~
+				lPops.append(pop)
+	#~
+	hdr=formthdr(lPops,dbkey,species)
+	outfrmtdDat=['%s\t%s\t%s\t%s'%(x,'N','N','\t'.join(dChrPos[x])) for x in lChrPos]
+	#~
+	slf=open(outgd_gentp,'w')
+	slf.write('\n'.join([hdr,'\n'.join(outfrmtdDat)]))
+	slf.close()
+	return 0
+
+
+def from_vcf(inVCF,outgd_gentp,dbkey,species):
+	"""
+	returns a gd_genotype file format from vcf a file
+	"""
+	slf=open(outgd_gentp,'w')
+	paramSet,addPop,adinfo=False,False,False
+	lPops=[]
+	for echl in open(inVCF,'r'):
+		if echl.strip():
+			if not paramSet:
+				if echl.find('##')==0:
+					pass
+				elif echl.find('#')==0:
+					paramSet={}
+					all_params=[x for x in echl.split() if x.strip()]
+					clmn=-1
+					for eparam in all_params:
+						clmn+=1
+						if eparam=='#CHROM':
+							paramSet['chr']=clmn
+						elif eparam=='POS':
+							paramSet['pos']=clmn
+						elif eparam=='REF':
+							paramSet['A']=clmn
+						elif eparam=='ALT':
+							paramSet['B']=clmn
+						elif eparam=='QUAL':
+							paramSet['qual']=clmn
+						elif eparam=='FORMAT':
+							paramSet['frmt']=clmn
+							addPop=True
+						elif addPop:
+							lPops.append(eparam)
+							paramSet[eparam]=clmn
+					if paramSet:
+						hdr=formthdr(lPops,dbkey,species)
+						slf.write('%s\n'%hdr)
+			else:
+				all_vals=[x for x in echl.split() if x.strip()]
+				frmt=[x for x in all_vals[paramSet['frmt']].split(':') if x.strip()]
+				clmn=-1
+				gtclmn,adclmn,qulclmn=False,False,False
+				for p in frmt:
+					clmn+=1
+					if p=='GT':
+						gtclmn=clmn
+					elif p=='AD':
+						adclmn=clmn
+						adinfo=True
+					elif p=='GQ':
+						qulclmn=clmn
+				#~
+				if adinfo:
+					outptInfo=[all_vals[paramSet['chr']],all_vals[paramSet['pos']],all_vals[paramSet['A']],all_vals[paramSet['B']],all_vals[paramSet['qual']]]
+					for ePop in lPops:
+						gntyp=all_vals[paramSet[ePop]].replace('|','/').split(':')[gtclmn].split('/')
+						encdGntyp,adA,adB,qual='-1','0','0','-1'
+						#~
+						if set(gntyp)!=set(['.']):
+							encdGntyp=2-sum([int(x) for x in gntyp])
+							if adclmn:
+								try:
+									adA,adB=all_vals[paramSet[ePop]].split(':')[adclmn].split(',')
+								except:
+									pass
+							if qulclmn:
+								try:
+									qual=all_vals[paramSet[ePop]].split(':')[qulclmn]
+								except:
+									pass
+						outptInfo.extend([adA,adB,str(encdGntyp),qual])
+					slf.write('%s\n'%'\t'.join(outptInfo))
+				#~
+				else:
+					outptInfo=[all_vals[paramSet['chr']],all_vals[paramSet['pos']],all_vals[paramSet['A']],all_vals[paramSet['B']]]
+					for ePop in lPops:
+						gntyp=all_vals[paramSet[ePop]].replace('|','/').split(':')[gtclmn].split('/')
+						try:
+							encdGntyp=2-sum([int(x) for x in gntyp])
+						except:
+							encdGntyp=-1
+						outptInfo.append(str(encdGntyp))
+					#~
+					slf.write('%s\n'%'\t'.join(outptInfo))
+	slf.close()
+	#~
+	if adinfo:
+		hdr=formthdr_gdsnp(lPops,dbkey,species)
+		slf=open('%stmp'%outgd_gentp,'w')
+		slf.write('%s\n'%hdr)
+		appnd=False
+		for echl in open(outgd_gentp,'r'):
+			if appnd:
+				slf.write(echl)
+			else:
+				if echl[0]!='#':
+					appnd=True
+					slf.write(echl)
+		slf.close()
+		#~
+		os.system('mv %stmp %s'%(outgd_gentp,outgd_gentp))
+	#~
+	return 0
+
+
+def main():
+	#~
+	parser = argparse.ArgumentParser(description='Returns the count of genes in KEGG categories and their statistical overrrepresentation, from a list of genes and an background file (i.e. plane text with ENSEMBLT and KEGG pathways).')
+	parser.add_argument('--input',metavar='input TXT file',type=str,help='the input file with the table in VCF format.',required=True)
+	parser.add_argument('--output',metavar='output TXT file',type=str,help='the output file with the table in gd_genotype format.',required=True)
+	parser.add_argument('--dbkey',metavar='string',type=str,help='the input reference species dbkey (i.e. susScr3).',required=True)
+	parser.add_argument('--species',metavar='string',type=str,help='the input reference species name (i.e. int).',required=True)
+	parser.add_argument('--format',metavar='string',type=str,help='format of the input file (i.e. vcf, genepop, fstat, csv).',required=True)
+
+	args = parser.parse_args()
+
+	infile = args.input
+	outgd_gentp = args.output
+	dbkey = args.dbkey
+	species = base64.b64decode(args.species)
+	frmat = args.format
+
+	#~
+	if frmat=='vcf':
+		from_vcf(infile,outgd_gentp,dbkey,species)
+	elif frmat=='genepop':
+		from_genepop(infile,outgd_gentp,dbkey,species)
+	elif frmat=='fstat':
+		from_fstat(infile,outgd_gentp,dbkey,species)
+	elif frmat=='csv':
+		from_csv(infile,outgd_gentp,dbkey,species)
+
+	#~
+	return 0
+
+if __name__ == '__main__':
+	main()
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/multiple_to_gd_genotype.xml	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,171 @@
+<tool id="gd_multiple_to_gd_genotype" name="Convert" version="1.0.0">
+  <description>: CSV, FSTAT, Genepop or VCF to either gd_snp or gd_genotype</description>
+
+  <command interpreter="python">
+    #import base64
+    #set species_arg = base64.b64encode(str($species))
+    multiple_to_gd_genotype.py --input '$input' --output '$output' --dbkey '$dbkey' --species '$species_arg' --format '$input_format'
+  </command>
+
+  <inputs>
+    <param name="input" type="data" format="txt" label="Input dataset" />
+
+    <param name="input_format" type="select" label="Input format">
+      <option value="csv" selected="true">CSV</option>
+      <option value="fstat">FSTAT</option>
+      <option value="genepop">Genepop</option>
+      <option value="vcf">VCF</option>
+    </param>
+
+    <param name="species" type="text" label="Focus species">
+      <sanitizer>
+        <valid initial="string.printable"/>
+      </sanitizer>
+    </param>
+
+    <param name="dbkey" type="genomebuild" label="Reference species" />
+  </inputs>
+
+  <outputs>
+    <data name="output" format="gd_genotype" />
+  </outputs>
+
+  <!--
+  <tests>
+    <test>
+      <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" />
+      <param name="p1_input" value="test_in/a.gd_indivs" ftype="gd_indivs" />
+      <param name="lo_coverage" value="0" />
+      <param name="hi_coverage" value="1000" />
+      <param name="low_ind_cov" value="3" />
+      <param name="lo_quality" value="30" />
+      <output name="output" file="test_out/modify_snp_table/modify.gd_snp" />
+    </test>
+  </tests>
+  -->
+
+  <help>
+
+**Dataset formats**
+
+The input dataset is formated as VCF, FSTAT, Genepop, or CSV, and is of
+Galaxy datatype text_.  Additionally, the name of the focus species (from
+which the SNPs in the VCF file were obtained) and a reference species
+are required.  The output dataset is in gd_genotype_ or gd_snp_ format.
+
+For input datasets in Genepop, FSTAT, or CSV formats, the program ignores
+population structures as well as alleles other than those encoded by 0,
+1, and 2.  For input datasets in FSTAT format the program accepts up to
+9 digits and for Genepop files only 2 digits.  Chromosome and position
+for each SNPs must be separated by a space or a tab.  Ancestral loci must
+be encoded as 1, derived as 2 and missing as 0.  In all cases ancestral
+and derived SNPs are returned as N.  Alternatively, a dataset in CSV
+format can include nucleotides.  In this case the ancestral nucleotide
+is defined as the most common allele.
+
+.. _text: ./static/formatHelp.html#text
+.. _gd_snp: ./static/formatHelp.html#gd_snp
+.. _gd_genotype: ./static/formatHelp.html#gd_genotype
+
+-----
+
+**What it does**
+
+This tool returns a gd_genotype dataset from VCF formatted files or three
+other conventional population genetics formats (i.e. FSTAT, Genepop,
+and CSV).  For VCF files that include the fields allelic depth, genotype
+quality and genotype ("AD", "GQ", and "GT" respectively in the "FORMAT"
+field) the input dataset can be converted into a gd_snp file.
+
+-----
+
+**Examples**
+
+- If the input format is VCF and includes the fields allelic depth, genotype quality and genotype ("AD", "GQ", and "GT" respectively in the "FORMAT" field).  Focus species name is "aye-aye" and reference species is "Human Feb. 2009 (GRCh37/hg19) (hg19)"::
+
+   #CHROM POS    ID          REF ALT QUAL    FILTER INFO FORMAT         19_F                    19.1_F             19.2_F
+   Chr21  382242 rs134033430 T   C   3296.97 .      .    GT:GQ:DP:PL:AD 1/1:75:26:943,75,0:0,26 1/1:3:1:30,3,0:0,1 ./.
+   Chr21  383680 rs137652597 T   C   2236.62 .      .    GT:GQ:DP:PL:AD 1/1:36:12:436,36,0:0,12 ./.                1/1:3:1:31,3,0:0,1
+   Chr21  387251 .           G   T   2407.88 .      .    GT:GQ:DP:PL:AD 1/1:30:12:394,30,0:0,10 ./.                ./.
+   etc.
+
+- output (gd_snp)::
+
+   #{"column_names":["chr","pos","A","B","Q","1A","1B","1G","1Q","2A","2B","2G","2Q","3A","3B","3G","3Q"],"dbkey":"aye-aye","individuals":[["19_F",6],["19.1_F",10],["19.2_F",14]],"pos":2,"rPos":2,"ref":1,
+   #"scaffold":1,"species":"hg19"}
+   Chr21   382242  T       C       3296.97 0       26      0       75      0       1       0       3       0       0       -1      -1
+   Chr21   383680  T       C       2236.62 0       12      0       36      0       0       -1      -1      0       1       0       3
+   Chr21   387251  G       T       2407.88 0       10      0       30      0       0       -1      -1      0       0       -1      -1
+   etc.
+
+- If the input format is VCF, focus species is "aye-aye" and reference species is "Human Feb. 2009 (GRCh37/hg19) (hg19)"::
+
+   #CHROM POS    ID          REF ALT QUAL    FILTER INFO FORMAT         19_F                    19.1_F             19.2_F
+   Chr21  382242 rs134033430 T   C   3296.97 .      .    GT:GQ:DP:PL    1/1:75:26:943,75,0      1/1:3:1:30,3,0:0,1 ./.
+   Chr21  383680 rs137652597 T   C   2236.62 .      .    GT:GQ:DP:PL    1/1:36:12:436,36,0      ./.                1/1:3:1:31,3,0:0,1
+   Chr21  387251 .           G   T   2407.88 .      .    GT:GQ:DP:PL    1/1:30:12:394,30,0      ./.                ./.
+   etc.
+
+- output (gd_genotype)::
+
+   #{"column_names":["chr","pos","A","B","1G","2G","3G"],"dbkey":"aye-aye","individuals":[["19_F",5],["19.1_F",6],["19.2_F",7]],"pos":2,"rPos":2,"ref":1,"scaffold":1,"species":"hg19"}
+   Chr21   382242  T       C       0       0       -1
+   Chr21   383680  T       C       0       -1      0
+   Chr21   387251  G       T       0       -1      -1
+   etc.
+
+- If the input format is Genepop::
+
+   Microsat on aye-aye from different locations
+        Chr21   382242, Chr21   383680, Chr21   387251
+   POP
+   19_F, 0202 0202 0202
+   Pop
+   19.1_F, 0202 0000 0000
+   19.2_F, 0000 0202 0000
+   etc.
+
+- or the input format is FSTAT::
+
+   300  3  2  1
+   Chr21   382242
+   Chr21   383680
+   Chr21   387251
+      19_F   22 22 22
+      19.1_F   22 00 00
+      19.2_F   00 22 00
+   etc.
+
+- or the input format is CSV::
+
+   ,19_F,19.1_F,19.2_F,...
+   Chr21   382242,22,22,00
+   Chr21   383680,22,00,22
+   Chr21   387251,22,00,00
+   etc.
+
+- output (gd_genotype)::
+
+   #{"column_names":["chr","pos","A","B","1G","2G","3G"],"dbkey":"aye-aye","individuals":[["19_F",5],["19.1_F",6],["19.2_F",7]],"pos":2,"rPos":2,"ref":1,"scaffold":1,"species":"hg19"}
+   Chr21   382242  N       N       0       0       -1
+   Chr21   383680  N       N       0       -1      0
+   Chr21   387251  N       N       0       -1      -1
+   etc.
+
+- if the input format is CSV::
+
+   ,19_F,19.1_F,19.2_F,...
+   Chr21   382242,C,C,N
+   Chr21   383680,C,N,C
+   Chr21   387251,T,N,N
+   etc.
+
+- output (gd_genotype)::
+
+   #{"column_names":["chr","pos","A","B","1G","2G","3G","4G"],"dbkey":"aye-aye","individuals":[["19_F",5],["19.1_F",6],["19.2_F",7],["...",8]],"pos":2,"rPos":2,"ref":1,"scaffold":1,"species":"hg19"}
+   Chr21   382242  C       N       2       2       -1
+   Chr21   383680  T       N       2       -1      2
+   Chr21   387251  T       N       2       -1      -1
+   etc.
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/nucleotide_diversity_pi.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,39 @@
+#!/usr/bin/env python
+
+import gd_util
+import sys
+from Population import Population
+
+################################################################################
+
+if len(sys.argv) != 7:
+    gd_util.die('Usage')
+
+gd_saps_file, gd_snps_file, covered_intervals_file, gd_indivs_file, output_file, ind_arg = sys.argv[1:]
+
+p_total = Population()
+p_total.from_wrapped_dict(ind_arg)
+
+p1 = Population()
+p1.from_population_file(gd_indivs_file)
+if not p_total.is_superset(p1):
+    gd_util.die('There is an individual in the population individuals that is not in the SNP table')
+
+################################################################################
+
+prog = 'get_pi'
+
+args = [ prog ]
+args.append(gd_saps_file)
+args.append(gd_snps_file)
+args.append(covered_intervals_file)
+
+columns = p1.column_list()
+for column in columns:
+    args.append(column)
+
+with open(output_file, 'w') as fh:
+    gd_util.run_program(prog, args, stdout=fh)
+
+sys.exit(0)
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/nucleotide_diversity_pi.xml	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,43 @@
+<tool id="gd_nucleotide_diversity_pi" name="Nucleotide Diversity" version="1.0.0">
+  <description>: &amp;pi; and &amp;theta;</description>
+
+  <command interpreter="python">
+    #import json
+    #import base64
+    #import zlib
+    #set $ind_names = $snps.dataset.metadata.individual_names
+    #set $ind_colms = $snps.dataset.metadata.individual_columns
+    #set $ind_dict = dict(zip($ind_names, $ind_colms))
+    #set $ind_json = json.dumps($ind_dict, separators=(',',':'))
+    #set $ind_comp = zlib.compress($ind_json, 9)
+    #set $ind_arg = base64.b64encode($ind_comp)
+    nucleotide_diversity_pi.py '$saps' '$snps' '$intervals' '$indivs' '$output' '$ind_arg'
+  </command>
+
+  <inputs>
+    <param name="saps" type="data" format="gd_sap" label="SAP Dataset" />
+    <param name="snps" type="data" format="gd_snp" label="SNP Dataset" />
+    <param name="intervals" type="data" format="gd_covered_cds" label="Covered intervals" />
+    <param name="indivs" type="data" format="gd_indivs" label="Population individuals" />
+  </inputs>
+
+  <outputs>
+    <data name="output" format="txt" />
+  </outputs>
+
+  <requirements>
+    <requirement type="package" version="0.1">gd_c_tools</requirement>
+  </requirements>
+
+  <help>
+**What it does**
+
+This tool computes values that estimate some basic parameters.
+
+**Output**
+
+the number of nonsyn SNPs, total number of nonsynon sites, piNon,
+the number of synon SNPs, total number of synon sites, piSyn, plus
+total length of covered intervals, thetaNon, thetaSyn
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/offspring_heterozygosity.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,59 @@
+#!/usr/bin/env python
+
+import sys
+import gd_util
+
+from Population import Population
+
+################################################################################
+
+if len(sys.argv) != 7:
+    gd_util.die('Usage')
+
+input, input_type, ind_arg, p1_input, p2_input, output = sys.argv[1:]
+
+p_total = Population()
+p_total.from_wrapped_dict(ind_arg)
+
+p1 = Population()
+p1.from_population_file(p1_input)
+if not p_total.is_superset(p1):
+    gd_util.die('There is an individual in the first population that is not in the SNP table')
+
+p2 = Population()
+p2.from_population_file(p2_input)
+if not p_total.is_superset(p2):
+    gd_util.die('There is an individual in the second population that is not in the SNP table')
+
+################################################################################
+
+prog = 'offspring_heterozygosity'
+
+args = [ prog ]
+args.append(input)  # a Galaxy SNP table
+
+for tag in p1.tag_list():
+    column, name = tag.split(':')
+
+    if input_type == 'gd_genotype':
+        column  = int(column) - 2
+
+    tag = '{0}:{1}:{2}'.format(column, 0, name)
+    args.append(tag)
+
+for tag in p2.tag_list():
+    column, name = tag.split(':')
+
+    if input_type == 'gd_genotype':
+        column = int(column) - 2
+
+    tag = '{0}:{1}:{2}'.format(column, 1, name)
+    args.append(tag)
+
+with open(output, 'w') as fh:
+    gd_util.run_program(prog, args, stdout=fh)
+
+################################################################################
+
+sys.exit(0)
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/offspring_heterozygosity.xml	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,68 @@
+<tool id="gd_offspring_heterozygosity" name="Pairs sequenced" version="1.0.0">
+  <description>: Offspring estimated heterozygosity of sequenced pairs</description>
+
+  <command interpreter="python">
+    #import json
+    #import base64
+    #import zlib
+    #set $ind_names = $input.dataset.metadata.individual_names
+    #set $ind_colms = $input.dataset.metadata.individual_columns
+    #set $ind_dict = dict(zip($ind_names, $ind_colms))
+    #set $ind_json = json.dumps($ind_dict, separators=(',',':'))
+    #set $ind_comp = zlib.compress($ind_json, 9)
+    #set $ind_arg = base64.b64encode($ind_comp)
+    offspring_heterozygosity.py '$input' '$input.ext' '$ind_arg' '$p1_input' '$p2_input' '$output'
+  </command>
+
+  <inputs>
+    <param name="input" type="data" format="gd_snp,gd_genotype" label="SNP dataset" />
+    <param name="p1_input" type="data" format="gd_indivs" label="First individuals dataset" />
+    <param name="p2_input" type="data" format="gd_indivs" label="Second individuals dataset" />
+  </inputs>
+
+  <outputs>
+    <data name="output" format="txt" />
+  </outputs>
+
+  <requirements>
+    <requirement type="package" version="0.1">gd_c_tools</requirement>
+  </requirements>
+
+  <!--
+  <tests>
+  </tests>
+  -->
+
+  <help>
+
+**Dataset formats**
+
+The input datasets are in gd_snp_, gd_genotype_, and gd_indivs_ formats.
+The output dataset is in text_ format.
+
+.. _gd_snp: ./static/formatHelp.html#gd_snp
+.. _gd_genotype: ./static/formatHelp.html#gd_genotype
+.. _gd_indivs: ./static/formatHelp.html#gd_indivs
+.. _text: ./static/formatHelp.html#text
+
+-----
+
+**What it does**
+
+For each pair of individuals, one from each specified set, the program
+computes the expected heterozygosity of any offspring of the pair, i.e.,
+the probability that the offspring has distinct nucleotides at a randomly
+chosen autosomal SNP.  In other words, we add the following numbers for
+each autosomal SNP where both genotypes are defined, then divide by the
+number of those SNPs:
+
+0 if the individuals are homozygous for the same nucleotide
+
+1 if the individuals are homozygous for different nucleotides
+
+1/2 otherwise (i.e., if one or both individuals are heterozygous)
+
+A SNP is ignored if one or both individuals have an undefined genotype
+(designated as -1).
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/offspring_heterozygosity_pedigree.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,58 @@
+#!/usr/bin/env python
+
+import sys
+import gd_util
+
+from Population import Population
+
+def load_and_check_pop(file, total_pop, name):
+        p = Population()
+        p.from_population_file(file)
+        if not total_pop.is_superset(p):
+            gd_util.die('There is an individual in the {0} that is not in the SNP table'.format(name))
+        return p
+
+def append_breeders_from_file(the_list, filename, kind):
+        with open(filename) as fh:
+            for line in fh:
+                elems = line.split()
+                breeder = elems[0].rstrip('\r\n')
+                the_list.append('{0}:{1}'.format(kind, breeder))
+
+################################################################################
+
+if len(sys.argv) != 9:
+    gd_util.die('Usage')
+
+input, input_type, pedigree, ind_arg, founders, b1_input, b2_input, output = sys.argv[1:]
+
+p_total = Population()
+p_total.from_wrapped_dict(ind_arg)
+
+f1 = load_and_check_pop(founders, p_total, 'founders')
+
+################################################################################
+
+prog = 'offspring_heterozygosity2'
+
+args = [ prog ]
+args.append(input)      # a Galaxy SNP table
+args.append(pedigree)   # a pedigree, where the SNP table is for the founders
+
+for tag in f1.tag_list():
+    column, name = tag.split(':')
+    if type == 'gd_genotype':
+        column = int(column) - 2
+    tag = 'founder:{0}:{1}'.format(column, name)
+    args.append(tag)
+
+append_breeders_from_file(args, b1_input, 0)
+append_breeders_from_file(args, b2_input, 1)
+
+with open(output, 'w') as fh:
+    gd_util.run_program(prog, args, stdout=fh)
+
+################################################################################
+
+sys.exit(0)
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/offspring_heterozygosity_pedigree.xml	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,81 @@
+<tool id="gd_offspring_heterozygosity_pedigree" name="Founders sequenced" version="1.0.0">
+  <description>: Offspring estimated heterozygosity from a pedigree with sequenced founders</description>
+
+  <command interpreter="python">
+    #import json
+    #import base64
+    #import zlib
+    #set $ind_names = $input.dataset.metadata.individual_names
+    #set $ind_colms = $input.dataset.metadata.individual_columns
+    #set $ind_dict = dict(zip($ind_names, $ind_colms))
+    #set $ind_json = json.dumps($ind_dict, separators=(',',':'))
+    #set $ind_comp = zlib.compress($ind_json, 9)
+    #set $ind_arg = base64.b64encode($ind_comp)
+    offspring_heterozygosity_pedigree.py '$input' '$input.ext' '$pedigree' '$ind_arg' '$founders' '$b1_input' '$b2_input' '$output'
+  </command>
+
+  <inputs>
+    <param name="input" type="data" format="gd_snp,gd_genotype" label="SNP dataset" />
+    <param name="pedigree" type="data" format="txt" label="Pedigree dataset" />
+    <param name="founders" type="data" format="gd_indivs" label="Founders dataset" />
+    <param name="b1_input" type="data" format="txt" label="First breeders dataset" />
+    <param name="b2_input" type="data" format="txt" label="Second breeders dataset" />
+  </inputs>
+
+  <outputs>
+    <data name="output" format="txt" />
+  </outputs>
+
+  <requirements>
+    <requirement type="package" version="0.1">gd_c_tools</requirement>
+  </requirements>
+
+  <!--
+  <tests>
+  </tests>
+  -->
+
+  <help>
+
+**Dataset formats**
+
+The input datasets are in gd_snp_, gd_genotype_, text_, and gd_indivs_ formats.
+The output dataset is in text_ format.
+
+.. _gd_snp: ./static/formatHelp.html#gd_snp
+.. _gd_genotype: ./static/formatHelp.html#gd_genotype
+.. _gd_indivs: ./static/formatHelp.html#gd_indivs
+.. _text: ./static/formatHelp.html#text
+
+-----
+
+**What it does**
+
+The user provides a Galaxy SNP table (gd_snp or gd_genotype format) that
+includes the founders of a pedigree, as well as two sets of individuals.
+The pedigree is specified by a text file with one row per individual,
+containing (1) the individual's name, (2) the name of one of the
+individual's parents, which must have occurred at the start of a previous
+line, and (3) the name of the individual's other parent, which occurred at
+the start of a previous line.  For a pedigree founder, both parent names
+are replaced by &quot;-&quot;.  The founders are specified by a table in
+gd_indivs format, e.g., as produced by &quot;Specify individuals&quot;
+tool.  Every founder must have genotypes supplied in the SNP table,
+and both parents need to be given as &quot;-&quot; in the pedigree.
+Conversely, every pedigree individual whose parents are &quot;-&quot;
+must be named as a founder.
+
+The user also provides two files that specify a set of names of
+individuals.  The first word on each line names an individual (one
+line per individual); any subsequent information on a line is ignored.
+The name of each individual must appear at the start of a line in the
+pedigree.
+
+For each pair of individuals, one from each specified set, the program
+computes the expected heterozygosity of any offspring of the pair,
+i.e., the probability that the offspring has distinct nucleotides at
+a randomly chosen autosomal SNP.  A SNP is ignorned if one or both
+potential parents have an ancestor with undefined genotype (designated
+as -1 in the SNP table).
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/pathway_image.xml	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,87 @@
+<tool id="gd_pathway_image" name="Pathway Image" version="1.1.0">
+  <description>: Draw a KEGG pathway, highlighting specified gene modules</description>
+
+  <command interpreter="python">
+    mkpthwpng.py
+      "--input=${input}"
+      "--output=${output}"
+      "--KEGGpath=${pathway}"
+      "--posKEGGclmn=${kpath}"
+      "--KEGGgeneposcolmn=${kgene}"
+  </command>
+
+  <inputs>
+    <param name="input" type="data" format="tabular" label="Dataset" />
+    <param name="kgene" type="data_column" data_ref="input" label="Column with KEGG gene ID"  />
+    <param name="kpath" type="data_column" data_ref="input" numerical="false" label="Column with KEGG pathways" />
+    <param name="pathway" label="Pathway" type="select">
+      <options from_file="gd.pathways.txt">
+        <column name="value" index="1"/>
+        <column name="name" index="2"/>
+        <filter type="data_meta" ref="input" key="dbkey" column="0" separator="\t" />
+      </options>
+    </param>
+  </inputs>
+
+  <outputs>
+    <data name="output" format="png" />
+  </outputs>
+
+  <requirements>
+    <requirement type="package" version="0.2.5">mechanize</requirement>
+  </requirements>
+
+  <tests>
+    <test>
+      <param name="input" value="test_in/sample.gd_sap" ftype="gd_sap" />
+      <param name="kpath" value="10" />
+      <param name="kgene" value="12" />
+      <param name="pathway" value="cfa05214" />
+      <output name="output" file="test_out/pathway_image/pathway_image.png" compare="sim_size" delta = "10000" />
+    </test>
+  </tests>
+
+  <help>
+
+**Dataset formats**
+
+The input and output datasets are in tabular_ format.
+The input dataset must have columns with KEGG gene ID and pathways.
+The output dataset is described below.
+(`Dataset missing?`_)
+
+.. _tabular: ./static/formatHelp.html#tab
+.. _Dataset missing?: ./static/formatHelp.html
+
+-----
+
+**What it does**
+
+This tool produces an image of a KEGG pathway, highlighting (in red) the
+modules representing genes in the input dataset.  Click here_ for help
+with reading the pathway map.
+
+NOTE:  a given gene can
+be assigned to multiple modules, and different genes can be assigned to
+the same module.
+
+.. _here: http://www.genome.jp/kegg/document/help_pathway.html
+
+-----
+
+**Example**
+
+- input::
+
+   476153  probably damaging       cfa00230=Purine metabolism.cfa00500=Starch and sucrose metabolism.cfa00740=Riboflavin metabolism.cfa00760=Nicotinate and nicotinamide metabolism.cfa00770=Pantothenate and CoA biosynthesis.cfa01100=Metabolic pathways
+   483960  probably damaging       N
+   610160  possibly damaging       N
+   403657  benign  cfa04010=MAPK signaling pathway.cfa04012=ErbB signaling pathway.cfa04060=Cytokine-cytokine receptor interaction.cfa04144=Endocytosis.cfa04510=Focal adhesion.cfa04540=Gap junction.cfa04810=Regulation of actin cytoskeleton.cfa05160=Hepatitis C.cfa05200=Pathways in cancer.cfa05212=Pancreatic cancer.cfa05213=Endometrial cancer.cfa05214=Glioma.cfa05215=Prostate cancer.cfa05218=Melanoma.cfa05219=Bladder cancer.cfa05223=Non-small cell lung cancer
+   etc.
+
+output showing pathway cfa05214:
+
+.. image:: $PATH_TO_IMAGES/gd_pathway_image.png
+
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/pca.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,247 @@
+#!/usr/bin/env python
+
+import gd_util
+import os
+import re
+import shutil
+import sys
+from BeautifulSoup import BeautifulSoup
+import gd_composite
+
+################################################################################
+
+def do_ped2geno(input, output):
+    lines = []
+    with open(input) as fh:
+        for line in fh:
+            line = line.rstrip('\r\n')
+            lines.append(line.split())
+
+    pair_map = {
+        '0':{ '0':'9', '1':'9', '2':'9' },
+        '1':{ '0':'1', '1':'2', '2':'1' },
+        '2':{ '0':'1', '1':'1', '2':'0' }
+    }
+    with open(output, 'w') as ofh:
+        for a_idx in xrange(6, len(lines[0]), 2):
+            b_idx = a_idx + 1
+            print >> ofh, ''.join(map(lambda line: pair_map[line[a_idx]][line[b_idx]], lines))
+
+def do_map2snp(input, output):
+    with open(output, 'w') as ofh:
+        with open(input) as fh:
+            for line in fh:
+                elems = line.split()
+                print >> ofh, '  {0} 11 0.002 2000 A T'.format(elems[1])
+
+def make_ind_file(ind_file, input):
+    pops = []
+    name_map = []
+    name_idx = 0
+
+    ofh = open(ind_file, 'w')
+
+    with open(input) as fh:
+        soup = BeautifulSoup(fh)
+        misc = soup.find('div', {'id': 'gd_misc'})
+        populations = misc('ul')[0]
+
+        i = 0
+        for entry in populations:
+            if i % 2 == 1:
+                population_name = entry.contents[0].encode('utf8').strip().replace(' ', '_')
+                pops.append(population_name)
+                individuals = entry.ol('li')
+                for individual in individuals:
+                    individual_name = individual.string.encode('utf8').strip()
+                    name_map.append(individual_name)
+                    print >> ofh, 'ind_%s' % name_idx, 'M', population_name
+                    name_idx += 1
+            i += 1
+
+    ofh.close()
+    return pops, name_map
+
+def make_par_file(par_file, geno_file, snp_file, ind_file, evec_file, eval_file):
+    with open(par_file, 'w') as fh:
+        print >> fh, 'genotypename: {0}'.format(geno_file)
+        print >> fh, 'snpname: {0}'.format(snp_file)
+        print >> fh, 'indivname: {0}'.format(ind_file)
+        print >> fh, 'evecoutname: {0}'.format(evec_file)
+        print >> fh, 'evaloutname: {0}'.format(eval_file)
+        print >> fh, 'altnormstyle: NO'
+        print >> fh, 'numoutevec: 2'
+
+def do_smartpca(par_file):
+    prog = 'smartpca'
+
+    args = [ prog ]
+    args.append('-p')
+    args.append(par_file)
+
+    stdoutdata, stderrdata = gd_util.run_program(prog, args)
+
+    stats = []
+
+    save_line = False
+    for line in stdoutdata.split('\n'):
+        if line.startswith(('## Average divergence', '## Anova statistics', '## Statistical significance')):
+            stats.append('')
+            save_line = True
+        if line.strip() == '':
+            save_line = False
+        if save_line:
+            stats.append(line)
+
+    return '\n'.join(stats[1:])
+
+def do_ploteig(evec_file, population_names):
+    prog = 'gd_ploteig'
+
+    args = [ prog ]
+    args.append('-i')
+    args.append(evec_file)
+    args.append('-c')
+    args.append('1:2')
+    args.append('-p')
+    args.append(':'.join(population_names))
+    args.append('-x')
+
+    gd_util.run_program(prog, args)
+
+def do_eval2pct(eval_file, explained_file):
+    prog = 'eval2pct'
+
+    args = [ prog ]
+    args.append(eval_file)
+
+    with open(explained_file, 'w') as fh:
+        gd_util.run_program(prog, args, stdout=fh)
+
+def do_coords2admix(coords_file):
+    prog = 'coords2admix'
+
+    args = [ prog ]
+    args.append(coords_file)
+
+    with open('fake', 'w') as fh:
+        gd_util.run_program(prog, args, stdout=fh)
+
+    shutil.copy2('fake', coords_file)
+
+ind_regex = re.compile('ind_([0-9]+)')
+
+def fix_names(name_map, files):
+    for file in files:
+        tmp_filename = '%s.tmp' % file
+        with open(tmp_filename, 'w') as ofh:
+            with open(file) as fh:
+                for line in fh:
+                    line = line.rstrip('\r\n')
+                    match = ind_regex.search(line)
+                    if match:
+                        idx = int(match.group(1))
+                        old = 'ind_%s' % idx
+                        new = name_map[idx].replace(' ', '_')
+                        line = line.replace(old, new)
+                    print >> ofh, line
+
+        shutil.copy2(tmp_filename, file)
+        os.unlink(tmp_filename)
+
+################################################################################
+
+if len(sys.argv) != 5:
+    gd_util.die('Usage')
+
+input, input_files_path, output, output_files_path = sys.argv[1:5]
+gd_util.mkdir_p(output_files_path)
+
+################################################################################
+
+ped_file = os.path.join(input_files_path, 'admix.ped')
+geno_file = os.path.join(output_files_path, 'admix.geno')
+do_ped2geno(ped_file, geno_file)
+
+################################################################################
+
+map_file = os.path.join(input_files_path, 'admix.map')
+snp_file = os.path.join(output_files_path, 'admix.snp')
+do_map2snp(map_file, snp_file)
+
+################################################################################
+
+ind_file = os.path.join(output_files_path, 'admix.ind')
+population_names, name_map = make_ind_file(ind_file, input)
+
+################################################################################
+
+par_file = os.path.join(output_files_path, 'par.admix')
+evec_file = os.path.join(output_files_path, 'coordinates.txt')
+eval_file = os.path.join(output_files_path, 'admix.eval')
+make_par_file(par_file, geno_file, snp_file, ind_file, evec_file, eval_file)
+
+################################################################################
+
+smartpca_stats = do_smartpca(par_file)
+fix_names(name_map, [ind_file, evec_file])
+
+################################################################################
+
+do_ploteig(evec_file, population_names)
+plot_file = 'coordinates.txt.1:2.{0}.pdf'.format(':'.join(population_names))
+output_plot_file = os.path.join(output_files_path, 'PCA.pdf')
+shutil.copy2(plot_file, output_plot_file)
+os.unlink(plot_file)
+
+################################################################################
+
+do_eval2pct(eval_file, os.path.join(output_files_path, 'explained.txt'))
+os.unlink(eval_file)
+
+################################################################################
+
+do_coords2admix(evec_file)
+
+################################################################################
+
+info_page = gd_composite.InfoPage()
+info_page.set_title('PCA Galaxy Composite Dataset')
+
+display_file = gd_composite.DisplayFile()
+display_value = gd_composite.DisplayValue()
+
+out_pdf = gd_composite.Parameter(name='PCA.pdf', value='PCA.pdf', display_type=display_file)
+out_evec = gd_composite.Parameter(name='coordinates.txt', value='coordinates.txt', display_type=display_file)
+out_explained = gd_composite.Parameter(name='explained.txt', value='explained.txt', display_type=display_file)
+
+evec_prefix = 'coordinates.txt.1:2.{0}'.format(':'.join(population_names))
+ps_file = '{0}.ps'.format(evec_prefix)
+xtxt_file = '{0}.xtxt'.format(evec_prefix)
+
+os.unlink(os.path.join(output_files_path, ps_file))
+os.unlink(os.path.join(output_files_path, xtxt_file))
+
+info_page.add_output_parameter(out_pdf)
+info_page.add_output_parameter(out_evec)
+info_page.add_output_parameter(out_explained)
+
+in_admix = gd_composite.Parameter(name='par.admix', value='par.admix', display_type=display_file)
+in_geno = gd_composite.Parameter(name='admix.geno', value='admix.geno', display_type=display_file)
+in_snp = gd_composite.Parameter(name='admix.snp', value='admix.snp', display_type=display_file)
+in_ind = gd_composite.Parameter(name='admix.ind', value='admix.ind', display_type=display_file)
+
+info_page.add_input_parameter(in_admix)
+info_page.add_input_parameter(in_geno)
+info_page.add_input_parameter(in_snp)
+info_page.add_input_parameter(in_ind)
+
+misc_stats = gd_composite.Parameter(description='Stats<p/><pre>\n{0}\n</pre>'.format(smartpca_stats), display_type=display_value)
+
+info_page.add_misc(misc_stats)
+
+with open (output, 'w') as ofh:
+    print >> ofh, info_page.render()
+
+sys.exit(0)
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/pca.xml	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,122 @@
+<tool id="gd_pca" name="PCA" version="1.0.0">
+  <description>: Principal Components Analysis of genotype data</description>
+
+  <command interpreter="python">
+    pca.py '$input' '$input.extra_files_path' '$output' '$output.files_path'
+  </command>
+
+  <inputs>
+    <param name="input" type="data" format="gd_ped" label="Dataset" />
+  </inputs>
+
+  <outputs>
+    <data name="output" format="html" />
+  </outputs>
+
+  <requirements>
+    <requirement type="package" version="5.0.1">eigensoft</requirement>
+    <requirement type="package" version="0.1">gd_c_tools</requirement>
+    <requirement type="package" version="3.2.1">beautifulsoup</requirement>
+  </requirements>
+
+  <!--
+  <tests>
+    <test>
+      <param name="input" value="fake" ftype="gd_ped" >
+        <metadata name="base_name" value="admix" />
+        <composite_data value="test_out/prepare_population_structure/prepare_population_structure.html" />
+        <composite_data value="test_out/prepare_population_structure/admix.ped" />
+        <composite_data value="test_out/prepare_population_structure/admix.map" />
+        <edit_attributes type="name" value="fake" />
+      </param>
+
+      <output name="output" file="test_out/pca/pca.html" ftype="html" compare="diff" lines_diff="2">
+        <extra_files type="file" name="admix.geno" value="test_out/pca/admix.geno" />
+        <extra_files type="file" name="admix.gd_indivs" value="test_out/pca/admix.gd_indivs" />
+        <extra_files type="file" name="admix.gd_snp" value="test_out/pca/admix.gd_snp" />
+        <extra_files type="file" name="coordinates.txt" value="test_out/pca/coordinates.txt" />
+        <extra_files type="file" name="explained.txt" value="test_out/pca/explained.txt" />
+        <extra_files type="file" name="par.admix" value="test_out/pca/par.admix" compare="diff" lines_diff="10" />
+        <extra_files type="file" name="PCA.pdf" value="test_out/pca/PCA.pdf" compare="sim_size" delta = "1000" />
+      </output>
+
+    </test>
+  </tests>
+  -->
+
+  <help>
+
+**Dataset formats**
+
+The input dataset is in gd_ped_ format.
+The output dataset is html_ with links to a pdf for a graphical output and
+text files.  (`Dataset missing?`_)
+
+.. _gd_ped: ./static/formatHelp.html#gd_ped
+.. _html: ./static/formalHelp.html#html
+.. _Dataset missing?: ./static/formatHelp.html
+
+-----
+
+**What it does**
+
+The user selects a gd_ped dataset generated by the Prepare Input tool.
+The PCA tool runs a
+Principal Components Analysis on the input genotype data and constructs
+a plot of the top two principal components. It also reports the
+following estimates of the statistical significance of the analysis.
+
+1. Average divergence between each pair of populations.  Specifically,
+from the covariance matrix X whose eigenvectors were computed, we can
+compute a "distance", d, for each pair of individuals (i,j): d(i,j) =
+X(i,i) + X(j,j) - 2X(i,j).  For each pair of populations (a,b) now
+define an average distance: D(a,b) = \sum d(i,j) (in pop a, in pop b)
+/ (\|pop a\| * \|pop b\|).  We then normalize D so that the diagonal
+has mean 1 and report it.
+
+2. Anova statistics for population differences along each
+eigenvector. For each eigenvector, a P-value for statistical
+significance of differences between each pair of populations along
+that eigenvector is printed.  +++ is used to highlight P-values less
+than 1e-06.  \*\*\* is used to highlight P-values between 1e-06 and
+1e-03.  If there are more than 2 populations, then an overall P-value
+is also printed for that eigenvector, as are the populations with
+minimum (minv) and maximum (maxv) eigenvector coordinate. [If there is
+only 1 population, no Anova statistics are printed.]
+
+3. Statistical significance of differences between populations. For
+each pair of populations, the above Anova statistics are summed across
+eigenvectors. The result is approximately chisq with d.o.f. equal to
+the number of eigenvectors. The chisq statistic and its p-value are
+printed. [If there is only 1 population, no statistics are printed.]
+
+We post-process the output of the PCA tool to estimate "admixture
+fractions".  For this, we take three populations at a time and
+determine each one's average point in the PCA plot (by separately
+averaging first and second coordinates).  For each combination of two
+center points, modeling two ancestral populations, we try to model the
+third central point as having a certain fraction, r, of its SNP
+genotypes from the second ancestral population and the remainder from
+the first ancestral population, where we estimate r.  The output file
+"coordinates.txt" then contains pairs of lines like
+
+projection along chord Population1 -> Population2
+  Population3: 0.12345
+
+where the number (in this case 0.1245) is the estimation of r.
+Computations with simulated data suggests that the true r is
+systematically underestimated, perhaps giving roughly 0.6 times r.
+
+-----
+
+**Acknowledgments**
+
+We use the programs "smartpca" and "ploteig" downloaded from
+
+http://genepath.med.harvard.edu/~reich/Software.htm
+
+and described in the paper "Population structure and eigenanalysis"
+by Nick Patterson, Alkes L. Price, and David Reich, PLoS Genetics, 2 (2006), e190.
+
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/phylogenetic_tree.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,197 @@
+#!/usr/bin/env python
+
+import gd_util
+import os
+import sys
+from Population import Population
+import gd_composite
+
+################################################################################
+
+if len(sys.argv) != 12:
+    gd_util.die('Usage')
+
+input, output, extra_files_path, input_type, data_source_arg, minimum_coverage, minimum_quality, p1_input, dbkey, draw_tree_options, ind_arg = sys.argv[1:]
+
+if input_type == 'gd_snp':
+    if data_source_arg == 'sequence_coverage':
+        data_source = 0
+    elif data_source_arg == 'estimated_genotype':
+        data_source = 1
+    else:
+        gd_util.die('Unsupported data_source: {0}'.format(data_source_arg))
+elif input_type == 'gd_genotype':
+        data_source = 1
+        minimum_coverage = 0
+        minimum_quality = 0
+else:
+    gd_util.die('Unsupported input_type:: {0}'.format(input_type))
+
+# note: TEST THIS
+if dbkey in ['', '?', 'None']:
+    dbkey = 'none'
+
+p_total = Population()
+p_total.from_wrapped_dict(ind_arg)
+
+if p1_input == "all_individuals":
+    tags = p_total.tag_list()
+else:
+    p1 = Population()
+    p1.from_population_file(p1_input)
+    if not p_total.is_superset(p1):
+        gd_util.die('There is an individual in the population that is not in the SNP table')
+    tags = p1.tag_list()
+
+################################################################################
+
+gd_util.mkdir_p(extra_files_path)
+phylip_outfile = os.path.join(extra_files_path, 'distance_matrix.phylip')
+newick_outfile = os.path.join(extra_files_path, 'phylogenetic_tree.newick')
+ps_outfile = 'tree.ps'
+pdf_outfile = os.path.join(extra_files_path, 'tree.pdf')
+informative_snp_file = os.path.join(extra_files_path, 'informative_snps.txt')
+mega_distance_matrix_file = os.path.join(extra_files_path, 'mega_distance_matrix.txt')
+
+################################################################################
+
+prog = 'dist_mat'
+
+args = [ prog ]
+args.append(input)
+args.append(minimum_coverage)
+args.append(minimum_quality)
+args.append(dbkey)
+args.append(data_source)
+args.append(informative_snp_file)
+args.append(mega_distance_matrix_file)
+
+for tag in tags:
+    if input_type == 'gd_genotype':
+        column, name = tag.split(':')
+        tag = '{0}:{1}'.format(int(column) - 2, name)
+    args.append(tag)
+
+with open(phylip_outfile, 'w') as fh:
+    gd_util.run_program(prog, args, stdout=fh)
+
+################################################################################
+
+prog = 'quicktree'
+
+args = [ prog ]
+args.append('-in')
+args.append('m')
+args.append('-out')
+args.append('t')
+args.append(phylip_outfile)
+
+with open(newick_outfile, 'w') as fh:
+    gd_util.run_program(prog, args, stdout=fh)
+
+################################################################################
+
+prog = 'draw_tree'
+
+args = [ prog ]
+
+if draw_tree_options:
+    args.append(draw_tree_options)
+
+args.append(newick_outfile)
+
+with open(ps_outfile, 'w') as fh:
+    gd_util.run_program(prog, args, stdout=fh)
+
+################################################################################
+
+prog = 'ps2pdf'
+
+args = [ prog ]
+args.append('-dPDFSETTINGS=/prepress')
+args.append(ps_outfile)
+args.append(pdf_outfile)
+
+gd_util.run_program(prog, args)
+
+################################################################################
+
+info_page = gd_composite.InfoPage()
+info_page.set_title('Phylogenetic tree Galaxy Composite Dataset')
+
+display_file = gd_composite.DisplayFile()
+display_value = gd_composite.DisplayValue()
+
+out_pdf = gd_composite.Parameter(name='tree.pdf', value='tree.pdf', display_type=display_file)
+out_newick = gd_composite.Parameter(value='phylogenetic_tree.newick', name='phylogenetic tree (newick)', display_type=display_file)
+out_phylip = gd_composite.Parameter(value='distance_matrix.phylip', name='Phylip distance matrix', display_type=display_file)
+out_mega = gd_composite.Parameter(value='mega_distance_matrix.txt', name='Mega distance matrix', display_type=display_file)
+out_snps = gd_composite.Parameter(value='informative_snps.txt', name='informative SNPs', display_type=display_file)
+
+info_page.add_output_parameter(out_pdf)
+info_page.add_output_parameter(out_newick)
+info_page.add_output_parameter(out_phylip)
+info_page.add_output_parameter(out_mega)
+info_page.add_output_parameter(out_snps)
+
+in_min_cov = gd_composite.Parameter(description='Minimum coverage', value=minimum_coverage, display_type=display_value)
+in_min_qual = gd_composite.Parameter(description='Minimum quality', value=minimum_quality, display_type=display_value)
+
+include_ref_value = 'no'
+if dbkey != 'none':
+    include_ref_value = 'yes'
+
+in_include_ref = gd_composite.Parameter(description='Include reference sequence', value=include_ref_value, display_type=display_value)
+
+if data_source == 0:
+    data_source_value = 'sequence coverage'
+elif data_source == 1:
+    data_source_value = 'estimated genotype'
+
+in_data_source = gd_composite.Parameter(description='Data source', value=data_source_value, display_type=display_value)
+
+branch_type_value = 'square'
+if 'd' in draw_tree_options:
+    branch_type_value = 'diagonal'
+
+in_branch_type = gd_composite.Parameter(description='Branch type', value=branch_type_value, display_type=display_value)
+
+branch_scale_value = 'yes'
+if 's' in draw_tree_options:
+    branch_scale_value = 'no'
+
+in_branch_scale = gd_composite.Parameter(description='Draw branches to scale', value=branch_scale_value, display_type=display_value)
+
+branch_length_value = 'yes'
+if 'b' in draw_tree_options:
+    branch_length_value = 'no'
+
+in_branch_length = gd_composite.Parameter(description='Show branch lengths', value=branch_length_value, display_type=display_value)
+
+tree_layout_value = 'horizontal'
+if 'v' in draw_tree_options:
+    tree_layout_value = 'vertical'
+
+in_tree_layout = gd_composite.Parameter(description='Tree layout', value=tree_layout_value, display_type=display_value)
+
+info_page.add_input_parameter(in_min_cov)
+info_page.add_input_parameter(in_min_qual)
+info_page.add_input_parameter(in_include_ref)
+info_page.add_input_parameter(in_data_source)
+info_page.add_input_parameter(in_branch_type)
+info_page.add_input_parameter(in_branch_scale)
+info_page.add_input_parameter(in_branch_length)
+info_page.add_input_parameter(in_tree_layout)
+
+misc_individuals = gd_composite.Parameter(name='Individuals', value=tags, display_type=gd_composite.DisplayTagList())
+
+info_page.add_misc(misc_individuals)
+
+
+with open(output, 'w') as ofh:
+    print >> ofh, info_page.render()
+
+################################################################################
+
+sys.exit(0)
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/phylogenetic_tree.xml	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,231 @@
+<tool id="gd_phylogenetic_tree" name="Phylogenetic Tree" version="1.1.0">
+  <description>: Show genetic relationships among individuals</description>
+
+  <command interpreter="python">
+    #import json
+    #import base64
+    #import zlib
+    #set $ind_names = $input.dataset.metadata.individual_names
+    #set $ind_colms = $input.dataset.metadata.individual_columns
+    #set $ind_dict = dict(zip($ind_names, $ind_colms))
+    #set $ind_json = json.dumps($ind_dict, separators=(',',':'))
+    #set $ind_comp = zlib.compress($ind_json, 9)
+    #set $ind_arg = base64.b64encode($ind_comp)
+    phylogenetic_tree.py '$input' '$output' '$output.files_path'
+    #if $input_type.choice == '0'
+      'gd_snp'
+      #if $input_type.data_source.choice == '0'
+        'sequence_coverage'
+        '$input_type.data_source.minimum_coverage'
+        '$input_type.data_source.minimum_quality'
+      #else if $input_type.data_source.choice == '1'
+        'estimated_genotype' '0' '0'
+      #end if
+    #else if $input_type.choice == '1'
+      'gd_genotype' 'estimated_genotype' '0' '0'
+    #end if
+    #if $individuals.choice == '0'
+      'all_individuals'
+    #else if $individuals.choice == '1'
+      '$individuals.p1_input'
+    #end if
+	#if ((str($input.metadata.scaffold) == str($input.metadata.ref)) and (str($input.metadata.pos) == str($input.metadata.rPos))) or (str($include_reference) == '0')
+        'none'
+    #else
+        '$input.metadata.dbkey'
+    #end if
+    #set $draw_tree_options = ''.join(str(x) for x in [$branch_style, $scale_style, $length_style, $layout_style])
+    #if $draw_tree_options == ''
+        ''
+    #else
+        '-$draw_tree_options'
+    #end if
+    '$ind_arg'
+  </command>
+
+  <inputs>
+    <conditional name="input_type">
+      <param name="choice" type="select" format="integer" label="Input format">
+        <option value="0" selected="true">gd_snp</option>
+        <option value="1">gd_genotype</option>
+      </param>
+      <when value="0">
+        <param name="input" type="data" format="gd_snp" label="SNP dataset" />
+
+        <conditional name="data_source">
+          <param name="choice" type="select" format="integer" label="Distance metric">
+            <option value="0">sequence coverage</option>
+            <option value="1" selected="true">estimated genotype</option>
+          </param>
+          <when value="0">
+            <param name="minimum_coverage" type="integer" min="0" value="0" label="Minimum SNP coverage" />
+            <param name="minimum_quality" type="integer" min="0" value="0" label="Minimum SNP quality"
+                   help="Note: minimum coverage and minimum quality cannot both be 0" />
+          </when>
+          <when value="1"/>
+        </conditional>
+      </when>
+      <when value="1">
+        <param name="input" type="data" format="gd_genotype" label="Genotype dataset" />
+      </when>
+    </conditional>
+
+    <conditional name="individuals">
+      <param name="choice" type="select" label="Compute for">
+        <option value="0" selected="true">All individuals</option>
+        <option value="1">Individuals in a population</option>
+      </param>
+      <when value="0" />
+      <when value="1">
+        <param name="p1_input" type="data" format="gd_indivs" label="Population individuals" />
+      </when>
+    </conditional>
+
+    <param name="include_reference" type="select" format="integer" label="Include reference sequence">
+      <option value="1" selected="true">Yes</option>
+      <option value="0">No</option>
+    </param>
+
+    <param name="branch_style" type="select" display="radio">
+      <label>Branch type</label>
+      <option value="" selected="true">square</option>
+      <option value="d">diagonal</option>
+    </param>
+
+    <param name="scale_style" type="select" display="radio">
+      <label>Draw branches to scale</label>
+      <option value="" selected="true">yes</option>
+      <option value="s">no</option>
+    </param>
+
+    <param name="length_style" type="select" display="radio">
+      <label>Show branch lengths</label>
+      <option value="" selected="true">yes</option>
+      <option value="b">no</option>
+    </param>
+
+    <param name="layout_style" type="select" display="radio">
+      <label>Tree layout</label>
+      <option value="" selected="true">horizontal</option>
+      <option value="v">vertical</option>
+    </param>
+  </inputs>
+
+  <outputs>
+    <data name="output" format="html" />
+  </outputs>
+
+  <tests>
+    <test>
+      <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" />
+      <param name="choice" value="0" />
+      <param name="minimum_coverage" value="3" />
+      <param name="minimum_quality" value="30" />
+      <param name="data_source" value="0" />
+      <param name="branch_style" value="" />
+      <param name="scale_style" value="" />
+      <param name="length_style" value="" />
+      <param name="layout_style" value="" />
+      <output name="output" file="test_out/phylogenetic_tree/phylogenetic_tree.html" ftype="html" compare="diff" lines_diff="2">
+        <extra_files type="file" name="distance_matrix.phylip" value="test_out/phylogenetic_tree/distance_matrix.phylip" />
+        <extra_files type="file" name="informative_snps.txt" value="test_out/phylogenetic_tree/informative_snps.txt" />
+        <extra_files type="file" name="mega_distance_matrix.txt" value="test_out/phylogenetic_tree/mega_distance_matrix.txt" />
+        <extra_files type="file" name="phylogenetic_tree.newick" value="test_out/phylogenetic_tree/phylogenetic_tree.newick" />
+        <extra_files type="file" name="tree.pdf" value="test_out/phylogenetic_tree/tree.pdf" compare="sim_size" delta = "1000"/>
+      </output>
+    </test>
+  </tests>
+
+  <requirements>
+    <requirement type="package" version="1.3">phast</requirement>
+    <requirement type="package" version="1.1">quicktree</requirement>
+    <requirement type="package" version="0.1">gd_c_tools</requirement>
+  </requirements>
+
+
+  <help>
+
+**Dataset formats**
+
+The input dataset is in gd_snp_ or gd_genotype_ format.
+The output is a composite dataset, containing the tree in both text (Newick_)
+and PostScript formats, as well as supplemental text information.
+(`Dataset missing?`_)
+
+.. _gd_snp: ./static/formatHelp.html#gd_snp
+.. _gd_genotype: ./static/formatHelp.html#gd_genotype
+.. _Newick: http://evolution.genetics.washington.edu/phylip/newicktree.html
+.. _Dataset missing?: ./static/formatHelp.html
+
+-----
+
+**What it does**
+
+This tool uses a gd_snp dataset to determine a kind of "genetic distance"
+between each pair of individuals.  That information is used to
+produce a tree-shaped figure that depicts how the individuals are related,
+both as a text files and as a diagram.
+The text files include a common tree format, Newick, as well as distance
+matrices and counts of informative SNPs for each pairwise comparison.
+The informative SNPs can be used as a guide to how reliable the tree is.
+
+The input parameters are:
+
+SNP dataset
+   A table of SNPs for various individuals, in gd_snp format.
+
+Individuals
+   By default all individuals are included in the analysis, but this can
+   optionally be restricted to a subset that has been defined using the
+   Specify Individuals tool.
+
+Minimum SNP coverage
+   For each pair of individuals, the tool looks for informative SNPs, i.e.,
+   where the sequence data for both individuals is adequate.  Specifying,
+   say, 7 for this option instructs the tool to consider only SNPs with
+   at least 7 reads in each of the two individuals (regardless of the
+   alleles) when estimating their genetic distance.
+
+Minimum SNP quality
+   Specifying, say, 37 for this option instructs the tool to consider
+   only SNPs with a quality score of at least 37 in both individuals
+   when estimating their genetic distance.
+
+Include reference sequence
+   For gd_snp datasets containing columns for a reference sequence, the
+   user can ask that the reference be indicated in the tree, to help with
+   rooting it.  If the dataset has no reference columns, this option has
+   no effect.
+
+Distance metric
+   The genetic distance between two individuals at a given SNP can
+   be estimated two ways.  One method is to use the absolute value of the
+   difference in the frequency of the first allele (or equivalently, the
+   second allele).  For instance, if the first individual has 5 reads of
+   each allele and the second individual has respectively 3 and 6 reads,
+   then the frequencies are 1/2 and 1/3, giving a distance 1/6 at that
+   SNP.  The other approach is to use the genotype calls to estimate
+   the difference in the number of occurrences of the first allele.
+   For instance, if the two genotypes are 2 and 1, i.e., the individuals
+   are estimated to have respectively 2 and 1 occurrences of the first
+   allele at this location, then the distance is 1 (the absolute value
+   of the difference of the two numbers).
+
+Output options
+   The final four options apply mostly to the graphical drawing of the
+   tree, except that the branch lengths are also added to the Newick text
+   file.
+
+-----
+
+**Acknowledgments**
+
+To convert the distance matrix to a Newick-formatted tree, we use the
+QuickTree program from
+http://www.sanger.ac.uk/resources/software/quicktree/ .
+
+To make the diagram we use draw_tree, available at
+http://compgen.bscb.cornell.edu/phast/ .
+
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/population_structure.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,121 @@
+#!/usr/bin/env python
+
+import errno
+import os
+import shutil
+import subprocess
+import sys
+from BeautifulSoup import BeautifulSoup
+import gd_composite
+
+################################################################################
+
+def run_admixture(ped_file, populations):
+    prog = 'admixture'
+
+    args = []
+    args.append(prog)
+    args.append(input_ped_file)
+    args.append(populations)
+
+    #print "args:", ' '.join(args)
+    ofh = open('/dev/null', 'w')
+    p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=ofh, stderr=sys.stderr)
+    rc = p.wait()
+    ofh.close()
+
+def run_r(input_file, output_file, populations):
+    prog = 'R'
+
+    args = []
+    args.append(prog)
+    args.append('--vanilla')
+    args.append('--quiet')
+    args.append('--args')
+    args.append(input_file)
+    args.append(output_file)
+    args.append(populations)
+
+    _realpath = os.path.realpath(__file__)
+    _script_dir = os.path.dirname(_realpath)
+    r_script_file = os.path.join(_script_dir, 'population_structure.r')
+
+    ifh = open(r_script_file)
+    ofh = open('/dev/null', 'w')
+    p = subprocess.Popen(args, bufsize=-1, stdin=ifh, stdout=ofh, stderr=None)
+    rc = p.wait()
+    ifh.close()
+    ofh.close()
+
+def mkdir_p(path):
+    try:
+        os.makedirs(path)
+    except OSError, e:
+        if e.errno <> errno.EEXIST:
+            raise
+
+def get_populations(input):
+    pops = []
+    pop_names = {}
+
+    with open(input) as fh:
+        soup = BeautifulSoup(fh)
+        misc = soup.find('div', {'id': 'gd_misc'})
+
+        return 'Populations\n{0}'.format(misc('ul')[0])
+
+################################################################################
+
+if len(sys.argv) != 6:
+    print >> sys.stderr, "Usage"
+    sys.exit(1)
+
+input_html_file, input_ped_file, output_file, extra_files_path, populations = sys.argv[1:6]
+populations_html = get_populations(input_html_file)
+
+run_admixture(input_ped_file, populations)
+
+ped_base = os.path.basename(input_ped_file)
+if ped_base.endswith('.ped'):
+    ped_base = ped_base[:-4]
+
+p_file = '%s.%s.P' % (ped_base, populations)
+q_file = '%s.%s.Q' % (ped_base, populations)
+
+mkdir_p(extra_files_path)
+numeric_output_file = os.path.join(extra_files_path, 'numeric.txt')
+shutil.copy2(q_file, numeric_output_file)
+os.remove(p_file)
+os.remove(q_file)
+
+graphical_output_file = os.path.join(extra_files_path, 'graphical.pdf')
+run_r(numeric_output_file, graphical_output_file, populations)
+
+################################################################################
+
+info_page = gd_composite.InfoPage()
+info_page.set_title('Population structure Galaxy Composite Dataset')
+
+display_file = gd_composite.DisplayFile()
+display_value = gd_composite.DisplayValue()
+
+out_pdf = gd_composite.Parameter(name='graphical.pdf', value='graphical.pdf', display_type=display_file)
+out_txt = gd_composite.Parameter(name='numeric.txt', value='numeric.txt', display_type=display_file)
+
+info_page.add_output_parameter(out_pdf)
+info_page.add_output_parameter(out_txt)
+
+in_pops = gd_composite.Parameter(description='Number of populations', value=populations, display_type=display_value)
+
+info_page.add_input_parameter(in_pops)
+
+misc_pops = gd_composite.Parameter(description=populations_html, display_type=display_value)
+
+info_page.add_misc(misc_pops)
+
+
+with open (output_file, 'w') as ofh:
+    print >> ofh, info_page.render()
+
+
+sys.exit(0)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/population_structure.r	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,19 @@
+library(RColorBrewer)
+
+args = commandArgs(trailingOnly=TRUE)
+q_file = args[[1]]
+output_file = args[[2]]
+populations = args[[3]]
+
+tbl <- read.table(q_file)
+
+if ( populations >= 3 && populations <= 12 ) {
+    colors = brewer.pal(populations, 'Paired')
+} else {
+    colors = rainbow(populations)
+}
+
+pdf(file=output_file, onefile=TRUE, width=7, height=3)
+barplot(t(as.matrix(tbl)), col=colors, xlab="Individual #", ylab="Ancestry", border=NA)
+
+dev.off()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/population_structure.xml	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,76 @@
+<tool id="gd_population_structure" name="Ancestry" version="1.0.0">
+  <description>: Characterize ancestries w.r.t. inferred ancestral populations</description>
+
+  <command interpreter="python">
+    population_structure.py "$input" "${input.extra_files_path}/admix.ped" "$output" "$output.files_path" "$populations"
+  </command>
+
+  <inputs>
+    <param name="input" type="data" format="gd_ped" label="Dataset" />
+    <param name="populations" type="integer" min="1" value="2" label="Number of populations" />
+  </inputs>
+
+  <outputs>
+    <data name="output" format="html" />
+  </outputs>
+
+  <requirements>
+    <requirement type="package" version="3.2.1">beautifulsoup</requirement>
+  </requirements>
+
+  <!--
+  <tests>
+    <test>
+      <param name="input" value="fake" ftype="gd_ped" >
+        <metadata name="base_name" value="admix" />
+        <composite_data value="test_out/prepare_population_structure/prepare_population_structure.html" />
+        <composite_data value="test_out/prepare_population_structure/admix.ped" />
+        <composite_data value="test_out/prepare_population_structure/admix.map" />
+        <edit_attributes type="name" value="fake" />
+      </param>
+      <param name="populations" value="2" />
+
+      <output name="output" file="test_out/population_structure/population_structure.html" ftype="html" compare="diff" lines_diff="2">
+        <extra_files type="file" name="numeric.txt" value="test_out/population_structure/numeric.txt" />
+        <extra_files type="file" name="graphical.pdf" value="test_out/population_structure/graphical.pdf" compare="sim_size" delta="1000" />
+      </output>
+    </test>
+  </tests>
+  -->
+
+
+  <help>
+
+**Dataset formats**
+
+The input dataset is in gd_ped_ format.
+The output dataset is a composite dataset containing a graph and text.
+(`Dataset missing?`_)
+
+.. _gd_ped: ./static/formatHelp.html#gd_ped
+.. _Dataset missing?: ./static/formatHelp.html
+
+-----
+
+**What it does**
+
+The user selects a gd_ped dataset generated by the Prepare Input tool,
+and specifies a number, K, of ancestral
+populations.  The tool estimates the proportion of each individual's ancestry
+coming from each ancestral population.  The proportions are shown both as
+numbers and graphically.
+
+-----
+
+**Acknowledgments**
+
+We use the program "Admixture", downloaded from
+
+http://www.genetics.ucla.edu/software/admixture/
+
+and described in the paper "Fast model-based estimation of ancestry in
+unrelated individuals" by David H. Alexander, John Novembre and Kenneth Lange,
+Genome Research 19 (2009), pp. 1655-1664.
+
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/prepare_population_structure.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,118 @@
+#!/usr/bin/env python
+
+import gd_util
+import os
+import shutil
+import sys
+from Population import Population
+import gd_composite
+
+################################################################################
+
+def do_import(filename, files_path, min_reads, min_qual, min_spacing, using_info, population_list):
+    info_page = gd_composite.InfoPage()
+    info_page.set_title('Prepare to look for population structure Galaxy Composite Dataset')
+
+    display_file = gd_composite.DisplayFile()
+    display_value = gd_composite.DisplayValue()
+
+    out_ped = gd_composite.Parameter(name='admix.ped', value='admix.ped', display_type=display_file)
+    out_map = gd_composite.Parameter(name='admix.map', value='admix.map', display_type=display_file)
+    out_use = gd_composite.Parameter(description=using_info, display_type=display_value)
+
+    info_page.add_output_parameter(out_ped)
+    info_page.add_output_parameter(out_map)
+    info_page.add_output_parameter(out_use)
+
+    in_min_reads = gd_composite.Parameter(description='Minimum reads covering a SNP, per individual', value=min_reads, display_type=display_value)
+    in_min_qual = gd_composite.Parameter(description='Minimum quality value, per individual', value=min_qual, display_type=display_value)
+    in_min_spacing = gd_composite.Parameter(description='Minimum spacing between SNPs on the same scaffold', value=min_spacing, display_type=display_value)
+
+    info_page.add_input_parameter(in_min_reads)
+    info_page.add_input_parameter(in_min_qual)
+    info_page.add_input_parameter(in_min_spacing)
+
+    misc_populations = gd_composite.Parameter(name='Populations', value=population_list, display_type=gd_composite.DisplayPopulationList())
+    info_page.add_misc(misc_populations)
+
+    with open(filename, 'w') as ofh:
+        print >> ofh, info_page.render()
+
+################################################################################
+
+if len(sys.argv) < 10:
+    gd_util.die('Usage')
+
+# parse command line
+input_snp_filename, input_type, min_reads, min_qual, min_spacing, output_filename, output_files_path, ind_arg = sys.argv[1:9]
+args = sys.argv[9:]
+
+population_files = []
+all_individuals = False
+
+for arg in args:
+    if arg == 'all_individuals':
+        all_individuals = True
+    elif len(arg) > 11 and arg[:11] == 'population:':
+        file, name = arg[11:].split(':', 1)
+        population_files.append((file, name))
+
+p_total = Population()
+p_total.from_wrapped_dict(ind_arg)
+
+individual_population = {}
+population_list = []
+
+if all_individuals:
+    p1 = p_total
+    p1.name = 'All Individuals'
+    population_list.append(p1)
+else:
+    p1 = Population()
+    for file, name in population_files:
+        this_pop = Population(name)
+        this_pop.from_population_file(file)
+        population_list.append(this_pop)
+
+        for tag in this_pop.tag_list():
+            if tag not in individual_population:
+                individual_population[tag] = name
+
+        # add individuals from this file to p1
+        p1.from_population_file(file)
+
+
+if not p_total.is_superset(p1):
+    gd_util.die('There is an individual in the population that is not in the SNP table')
+
+################################################################################
+
+prog = 'admix_prep'
+
+args = [ prog ]
+args.append(input_snp_filename)
+args.append(min_reads)
+args.append(min_qual)
+args.append(min_spacing)
+
+for tag in p1.tag_list():
+    if input_type == 'gd_genotype':
+        column, name = tag.split(':', 1)
+        tag = '{0}:{1}'.format(int(column) - 2, name)
+    args.append(tag)
+
+stdoutdata, stderrdata = gd_util.run_program(prog, args)
+using_info = stdoutdata.rstrip('\r\n')
+
+################################################################################
+
+gd_util.mkdir_p(output_files_path)
+
+output_ped_filename = os.path.join(output_files_path, 'admix.ped')
+output_map_filename = os.path.join(output_files_path, 'admix.map')
+shutil.copy2('admix.ped', output_ped_filename)
+shutil.copy2('admix.map', output_map_filename)
+
+do_import(output_filename, output_files_path, min_reads, min_qual, min_spacing, using_info, population_list)
+sys.exit(0)
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/prepare_population_structure.xml	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,164 @@
+<tool id="gd_prepare_population_structure" name="Prepare Input" version="1.2.0">
+  <description>: Filter and convert to the format needed for these tools</description>
+
+  <command interpreter="python">
+    #import json
+    #import base64
+    #import zlib
+    #set $ind_names = $input.dataset.metadata.individual_names
+    #set $ind_colms = $input.dataset.metadata.individual_columns
+    #set $ind_dict = dict(zip($ind_names, $ind_colms))
+    #set $ind_json = json.dumps($ind_dict, separators=(',',':'))
+    #set $ind_comp = zlib.compress($ind_json, 9)
+    #set $ind_arg = base64.b64encode($ind_comp)
+    prepare_population_structure.py '$input'
+    #if $input_type.choice == '0'
+      'gd_snp' '$input_type.min_reads' '$input_type.min_qual'
+    #else if $input_type.choice == '1'
+      'gd_genotype' '0' '0'
+    #end if
+    '0' '$output' '$output.files_path' '$ind_arg'
+    #if $individuals.choice == '0'
+        'all_individuals'
+    #else if $individuals.choice == '1'
+        #for $population in $individuals.populations
+          #set $pop_arg = 'population:%s:%s' % (str($population.p_input), str($population.p_input.name))
+          '$pop_arg'
+        #end for
+    #end if
+  </command>
+
+  <inputs>
+    <conditional name="input_type">
+      <param name="choice" type="select" format="integer" label="Input format">
+        <option value="0" selected="true">gd_snp</option>
+        <option value="1">gd_genotype</option>
+      </param>
+      <when value="0">
+        <param name="input" type="data" format="gd_snp" label="SNP dataset" />
+        <param name="min_reads" type="integer" min="0" value="0" label="Minimum SNP coverage" />
+        <param name="min_qual" type="integer" min="0" value="0" label="Minimum SNP quality" />
+      </when>
+      <when value="1">
+        <param name="input" type="data" format="gd_genotype" label="Genotype dataset" />
+      </when>
+    </conditional>
+
+    <conditional name="individuals">
+      <param name="choice" type="select" label="Individuals">
+        <option value="0" selected="true">All individuals</option>
+        <option value="1">Specified populations</option>
+      </param>
+      <when value="0" />
+      <when value="1">
+        <repeat name="populations" title="Population" min="1">
+          <param name="p_input" type="data" format="gd_indivs" label="Individuals" />
+        </repeat>
+      </when>
+    </conditional>
+
+    <!--
+    <param name="min_spacing" type="integer" min="0" value="0" label="Minimum spacing between SNPs" />
+    "$min_spacing" "$output" "$output.files_path"
+    -->
+  </inputs>
+
+  <outputs>
+    <data name="output" format="gd_ped">
+      <actions>
+        <action type="metadata" name="base_name" default="admix" />
+      </actions>
+    </data>
+  </outputs>
+
+  <requirements>
+    <requirement type="package" version="0.1">gd_c_tools</requirement>
+  </requirements>
+
+  <tests>
+    <test>
+      <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" />
+      <param name="min_reads" value="3" />
+      <param name="min_qual" value="30" />
+      <param name="min_spacing" value="0" />
+      <param name="choice" value="0" />
+      <output name="output" file="test_out/prepare_population_structure/prepare_population_structure.html" ftype="html" compare="diff" lines_diff="2">
+        <extra_files type="file" name="admix.map" value="test_out/prepare_population_structure/admix.map" />
+        <extra_files type="file" name="admix.ped" value="test_out/prepare_population_structure/admix.ped" />
+      </output>
+    </test>
+  </tests>
+
+  <help>
+
+**Dataset formats**
+
+The input datasets are in gd_snp_, gd_genotype_, and gd_indivs_ formats.
+The output dataset is in gd_ped_ format.  (`Dataset missing?`_)
+
+.. _gd_snp: ./static/formatHelp.html#gd_snp
+.. _gd_genotype: ./static/formatHelp.html#gd_genotype
+.. _gd_indivs: ./static/formatHelp.html#gd_indivs
+.. _gd_ped: ./static/formatHelp.html#gd_ped
+.. _Dataset missing?: ./static/formatHelp.html
+
+-----
+
+**What it does**
+
+This tool converts a gd_snp dataset into the format needed for estimating
+the population structure.  You can select the individuals to be included,
+by using "population" datasets created via the Specify Individuals tool.
+(It is important for these population datasets to have distinguishable names,
+since they will be stored in the output's metadata so that subsequent tools
+can use them as labels.  If necessary, rename the datasets to give them
+distinct and meaningful names before running this tool.)
+
+You can also filter the SNPs, based on criteria such as minimum coverage
+(a qualifying SNP must have at least this many reads for every included
+individual), minimum quality score (for every included individual), and/or
+minimum spacing (SNPs that are too close together on the same chromosome or
+scaffold are discarded).  In addition to producing the filtered and formatted
+.map and .ped files for subsequent analysis, the tool reports the number of
+SNPs meeting these conditions, which can be seen by clicking on the eye icon
+in the history panel after the program runs.
+
+-----
+
+**Example**
+
+- input::
+
+    Contig161_chr1_4641264_4641879   115  C  T  73.5   chr1   4641382  C   6  0  2  45   8  0  2  51   15  0  2  72   5  0  2  42   6  0  2  45  10  0  2  57   Y  54  0.323  0
+    Contig48_chr1_10150253_10151311   11  A  G  94.3   chr1  10150264  A   1  0  2  30   1  0  2  30    1  0  2  30   3  0  2  36   1  0  2  30   1  0  2  30   Y  22  +99.   0
+    Contig20_chr1_21313469_21313570   66  C  T  54.0   chr1  21313534  C   4  0  2  39   4  0  2  39    5  0  2  42   4  0  2  39   4  0  2  39   5  0  2  42   N   1  +99.   0
+    etc.
+
+- output cover page::
+
+    Prepare to look for population structure Galaxy Composite Dataset
+    Output completed: 2012-10-01 04:09:36 PM
+
+    Outputs
+        * admix.ped (link)
+        * admix.map (link)
+        * Using 222 of 400 SNPs
+
+    Inputs
+        * Minimum reads covering a SNP, per individual: 6
+        * Minimum quality value, per individual: 0
+        * Minimum spacing between SNPs on the same scaffold: 0
+
+    Populations
+        * Pop. A
+             1. PB1
+             2. PB2
+        * Pop. B
+             1. PB3
+             2. PB4
+        * Pop. C
+             1. PB6
+             2. PB8
+
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/rank_pathways.xml	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,168 @@
+<tool id="gd_calc_freq" name="Rank Pathways" version="1.2.0">
+  <description>: Assess the impact of a gene set on KEGG pathways</description>
+
+  <command interpreter="python">
+    #if $rank_by.choice == 'pct'
+      rank_pathways_pct.py
+      --input '$rank_by.input1'
+      --columnENSEMBLT '$rank_by.t_col1'
+      --inBckgrndfile '$rank_by.input2'
+      --columnENSEMBLTBckgrnd '$rank_by.t_col2'
+      --columnKEGGBckgrnd '$rank_by.k_col2'
+      --statsTest '$rank_by.stat'
+      --output '$output'
+    #else if $rank_by.choice == 'paths'
+      calclenchange.py
+      '--loc_file=${GALAXY_DATA_INDEX_DIR}/gd.rank.loc'
+      '--species=${rank_by.input.metadata.dbkey}'
+      '--input=${rank_by.input}'
+      '--output=${output}'
+      '--posKEGGclmn=${rank_by.kpath}'
+      '--KEGGgeneposcolmn=${rank_by.kgene}'
+    #end if
+  </command>
+
+  <inputs>
+    <conditional name="rank_by">
+      <param name="choice" type="select" label="Rank by">
+        <option value="pct" selected="true">percentage of genes affected</option>
+        <option value="paths">change in length and number of paths</option>
+      </param>
+      <when value="pct">
+        <!-- using fields similar to the Rank Terms tool -->
+        <param name="input1" type="data" format="tabular" label="Query dataset" />
+        <param name="t_col1" type="data_column" data_ref="input1" label="Column with ENSEMBL transcript codes" />
+        <param name="input2" type="data" format="tabular" label="Background dataset" />
+        <param name="t_col2" type="data_column" data_ref="input2" label="Column with ENSEMBL transcript codes" />
+        <param name="k_col2" type="data_column" data_ref="input2" label="Column with KEGG pathways" />
+        <param name="stat" type="select" label="Statistic for determining enrichment/depletion">
+          <option value="fisher" selected="true">two-tailed Fisher's exact test</option>
+          <option value="hypergeometric">hypergeometric test</option>
+          <option value="binomial">binomial probability</option>
+        </param>
+      </when>
+      <when value="paths">
+        <param name="input" type="data" format="tabular" label="Dataset" />
+        <param name="kgene" type="data_column" data_ref="input" label="Column with KEGG gene ID" />
+        <param name="kpath" type="data_column" data_ref="input" numerical="false" label="Column with KEGG pathways" />
+      </when>
+    </conditional>
+  </inputs>
+
+  <outputs>
+    <data name="output" format="tabular" />
+  </outputs>
+
+  <requirements>
+    <requirement type="package" version="0.2.5">mechanize</requirement>
+    <requirement type="package" version="1.8.1">networkx</requirement>
+    <requirement type="package" version="0.1.4">fisher</requirement>
+  </requirements>
+
+
+  <tests>
+    <test>
+    </test>
+  </tests>
+
+  <help>
+
+**Dataset formats**
+
+The query dataset has a column containing ENSEMBL transcript codes for
+the gene set of interest, while the background dataset has one column
+with ENSEMBL transcript codes and another with KEGG pathways, for some larger
+universe of genes.
+
+All of the input and output datasets are in tabular_ format.  The input
+dataset (i.e. query) to rank by "percentage of genes affected" has a
+column containing ENSEMBL transcript codes for the gene set of interest,
+while the background dataset has one column with ENSEMBL transcript
+codes and another with KEGG pathways, for some larger universe of genes.
+The input dataset to rank by "change in length and number of paths"
+must have columns with KEGG gene ID and pathways.  The output datasets
+are described below.  (`Dataset missing?`_)
+
+.. _tabular: ./static/formatHelp.html#tab
+.. _Dataset missing?: ./static/formatHelp.html
+
+-----
+
+**What it does**
+
+Given a query set of genes from a larger background dataset, this tool
+evaluates the over- or under-representation of KEGG pathways in the query
+set, using the specified statistical test.  Alternatively, the tool ranks
+the pathways based on the change in length and number of paths connecting
+sources and sinks.  This change is calculated between graphs representing
+pathways with and without excluding the nodes that represent the genes
+in an input list.  Sources are all the nodes representing the initial
+reactants/products in the pathway.  Sinks are all the nodes representing
+the final reactants/products in the pathway.
+
+If pathways are ranked by percentage of genes affected, the output
+contains a row for each KEGG pathway, with the following columns:
+
+1. count: the number of genes in the query set that are in this pathway
+2. representation: the percentage of this pathway's genes (from the background dataset) that appear in the query set
+3. ranking of this pathway, based on its representation ("1" is highest)
+4. probability of depletion of this pathway in the query dataset
+5. probability of enrichment of this pathway in the query dataset
+6. name of the pathway
+
+If pathways are ranked by change in length and number of paths, the
+output is a tabular dataset with the following columns:
+
+1. change in the mean length of paths between sources and sinks
+2. mean length of paths between sources and sinks in the pathway including the genes in the input dataset. If the pathway do not have sources/sinks, the length is assumed to be infinite (I)
+3. mean length of paths between sources and sinks in the pathway excluding the genes in the input dataset. If the pathway do not have sources/sinks, the length is assumed to be infinite (I)
+4. rank of the change in the mean length of paths between sources and sinks (from high change to low change)
+5. change in the number of paths between sources and sinks
+6. number of paths between sources and sinks in the pathway including the genes in the input dataset. If the pathway do not have sources/sinks, it is assumed to be a circuit (C)
+7. number of paths between sources and sinks in the pathway excluding the genes in the input dataset. If the pathway do not have sources/sinks, it is assumed to be a circuit (C)
+8. rank of the change in the number of paths between sources and sinks (from high change to low change)
+9. name of the pathway
+
+-----
+
+**Examples**
+
+Rank by percentage of genes affected:
+
+- input background dataset (column 5 for ENSEMBL transcript, column 12 for KEGG pathways, two-tailed Fisher's exact test for statistic)::
+
+   Contig39_chr1_3261104_3261850   414  chr1  3261546  ENSCAFT00000000001   ENSCAFP00000000001   S    667   F    476153  probably damaging    cfa00230=Purine metabolism.cfa00500=Starch and sucrose metabolism.cfa00740=Riboflavin metabolism.cfa00760=Nicotinate and nicotinamide metabolism.cfa00770=Pantothenate and CoA biosynthesis.cfa01100=Metabolic pathways
+   Contig62_chr1_19011969_19012646 265  chr1  19012240 ENSCAFT00000000144   ENSCAFP00000000125   *    161   R    483960  probably damaging    N
+   etc.
+
+- input query dataset (column 5 for ENSEMBL transcript)::
+
+   Contig12_chr20_101969_112646    265  chr20 9822141  ENSCAFT00000001234   ENSCAFP00000021123   T    101   R    476153  probably damaging
+   Contig39_chr1_3261104_3261850   414  chr1  3261546  ENSCAFT00000000001   ENSCAFP00000000001   S    667   F    476153  probably damaging
+   etc.
+
+- output::
+
+   3   0.20    1   1.0 0.0065  cfa03450=Non-homologous end-joining
+   1   0.067   2   1.0 0.019   cfa00750=Vitamin B6 metabolism
+   2   0.062   3   1.0 0.021   cfa00290=Valine, leucine and isoleucine biosynthesis
+   1   0.037   4   1.0 0.035   cfa00770=Pantothenate and CoA biosynthesis
+   etc.
+
+Rank by change in length and number of paths:
+
+- input (column 10 for KEGG gene ID, column 12 for KEGG pathways)::
+
+   Contig39_chr1_3261104_3261850   414  chr1  3261546  ENSCAFT00000000001   ENSCAFP00000000001   S    667   F    476153  probably damaging    cfa00230=Purine metabolism.cfa00500=Starch and sucrose metabolism.cfa00740=Riboflavin metabolism.cfa00760=Nicotinate and nicotinamide metabolism.cfa00770=Pantothenate and CoA biosynthesis.cfa01100=Metabolic pathways
+   Contig62_chr1_19011969_19012646 265  chr1  19012240 ENSCAFT00000000144   ENSCAFP00000000125   *    161   R    483960  probably damaging    N
+   etc.
+
+- output::
+
+   3.64   8.44   4.8     2   4    9    5   1   cfa00260=Glycine, serine and threonine metabolism
+   7.6    9.6    2       1   3    5    2   2   cfa00240=Pyrimidine metabolism
+   0.05   2.67   2.62    6   1   30   29   3   cfa00982=Drug metabolism - cytochrome P450
+   -0.08  8.33   8.41   84   1   30   29   3   cfa00564=Glycerophospholipid metabolism
+   etc.
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/rank_pathways_pct.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,206 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+#       KEGGFisher.py
+#
+#       Copyright 2013 Oscar Reina <oscar@niska.bx.psu.edu>
+#
+#       This program is free software; you can redistribute it and/or modify
+#       it under the pathways of the GNU General Public License as published by
+#       the Free Software Foundation; either version 2 of the License, or
+#       (at your option) any later version.
+#
+#       This program is distributed in the hope that it will be useful,
+#       but WITHOUT ANY WARRANTY; without even the implied warranty of
+#       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#       GNU General Public License for more details.
+#
+#       You should have received a copy of the GNU General Public License
+#       along with this program; if not, write to the Free Software
+#       Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+#       MA 02110-1301, USA.
+
+import argparse
+import os
+import sys
+from fisher import pvalue as fisher
+from decimal import Decimal,getcontext
+from math import lgamma,exp,factorial
+
+def binProb(SAPs_KEGG,NoSAPs_KEGG,SAPs_all,NoSAPs_all,CntKEGG_All,totalSAPs,pKEGG):
+	"""
+	Returns binomial probability.
+	"""
+	def comb(CntKEGG_All,k):
+		return factorial(CntKEGG_All) / Decimal(str(factorial(k)*factorial(CntKEGG_All-k)))
+	probLow = 0
+	for k in range(0, SAPs_KEGG+1):
+		cp=Decimal(str(comb(CntKEGG_All,k)))
+		bp=Decimal(str(pKEGG**k))
+		dp=Decimal(str(1.0-pKEGG))**Decimal(str(CntKEGG_All-k))
+		probLow+=cp*bp*dp
+	#~
+	probHigh = 0
+	for k in range(int(SAPs_KEGG),CntKEGG_All+1):
+		cp=Decimal(str(comb(CntKEGG_All,k)))
+		bp=Decimal(str(pKEGG**k))
+		dp=Decimal(str(1.0-pKEGG))**Decimal(str(CntKEGG_All-k))
+		probHigh+=cp*bp*dp
+	return probLow,probHigh
+
+def gauss_hypergeom(X, CntKEGG_All, SAPs_all, totalSAPs):
+	CntKEGG_All,SAPs_all,totalSAPs
+	"""
+	Returns the probability of drawing X successes of SAPs_all marked items
+	in CntKEGG_All draws from a bin of totalSAPs total items
+	"""
+	def logchoose(ni, ki):
+		try:
+			lgn1 = lgamma(ni+1)
+			lgk1 = lgamma(ki+1)
+			lgnk1 = lgamma(ni-ki+1)
+		except ValueError:
+			raise ValueError
+		return lgn1 - (lgnk1 + lgk1)
+	#~
+	r1 = logchoose(SAPs_all, X)
+	try:
+		r2 = logchoose(totalSAPs-SAPs_all, CntKEGG_All-X)
+	except ValueError:
+		return 0
+	r3 = logchoose(totalSAPs,CntKEGG_All)
+	return exp(r1 + r2 - r3)
+
+def hypergeo_sf(SAPs_KEGG,NoSAPs_KEGG,SAPs_all,NoSAPs_all,CntKEGG_All,totalSAPs,pKEGG):
+	"""
+	Runs Hypergeometric probability test
+	"""
+	s = 0
+	t=0
+	for i in range(SAPs_KEGG,min(SAPs_all,CntKEGG_All)+1):
+		s += max(gauss_hypergeom(i,CntKEGG_All,SAPs_all,totalSAPs), 0.0)
+	for i in range(0, SAPs_KEGG+1):
+		t += max(gauss_hypergeom(i,CntKEGG_All,SAPs_all,totalSAPs), 0.0)
+	return min(max(t,0.0), 1),min(max(s,0.0), 1)
+
+def fisherexct(SAPs_KEGG,NoSAPs_KEGG,SAPs_all,NoSAPs_all,CntKEGG_All,totalSAPs,pKEGG):
+	"""
+	Runs Fisher's exact test
+	"""
+	ftest=fisher(SAPs_KEGG,NoSAPs_KEGG,SAPs_all,NoSAPs_all)
+	probLow,probHigh=ftest.left_tail,ftest.right_tail
+	return probLow,probHigh
+
+def rtrnKEGGcENSEMBLc(inBckgrndfile,columnENSEMBLTBckgrnd,columnKEGGBckgrnd):
+	"""
+	"""
+	dKEGGTENSEMBLT={}
+	for eachl in open(inBckgrndfile,'r'):
+		if eachl.strip():
+			ENSEMBLT=eachl.splitlines()[0].split('\t')[columnENSEMBLTBckgrnd]
+			KEGGTs=set(eachl.splitlines()[0].split('\t')[columnKEGGBckgrnd].split('.'))
+			KEGGTs=KEGGTs.difference(set(['','U','N']))
+			for KEGGT in KEGGTs:
+				try:
+					dKEGGTENSEMBLT[KEGGT].add(ENSEMBLT)
+				except:
+					dKEGGTENSEMBLT[KEGGT]=set([ENSEMBLT])
+	ENSEMBLTGinKEGG=set.union(*dKEGGTENSEMBLT.values())
+	return dKEGGTENSEMBLT,ENSEMBLTGinKEGG
+
+def rtrnENSEMBLcSAPs(inSAPsfile,columnENSEMBLT,ENSEMBLTGinKEGG):
+	"""
+	returns a set of the ENSEMBLT codes present in the input list and
+	in the KEGG file
+	"""
+	sENSEMBLTSAPsinKEGG=set()
+	for eachl in open(inSAPsfile,'r'):
+		ENSEMBLT=eachl.splitlines()[0].split('\t')[columnENSEMBLT]
+		if ENSEMBLT in ENSEMBLTGinKEGG:
+			sENSEMBLTSAPsinKEGG.add(ENSEMBLT)
+	return sENSEMBLTSAPsinKEGG
+
+def rtrnCounts(dKEGGTENSEMBLT,ENSEMBLTGinKEGG,sENSEMBLTSAPsinKEGG,statsTest):
+	"""
+	returns a list of the ENSEMBLT codes present in the input list and
+	in the KEGG file. The pathways in this list are: 'Go Term','# Genes in
+	the KEGG Term','# Genes in the list and in the KEGG Term','Enrichement
+	of the KEGG Term for genes in the input list','Genes in the input list
+	present in the KEGG term'
+	"""
+	totalSAPs=len(ENSEMBLTGinKEGG)
+	SAPs_all=len(sENSEMBLTSAPsinKEGG)
+	NoSAPs_all=totalSAPs-SAPs_all
+	pKEGG=SAPs_all/float(totalSAPs)
+	#~
+	lp=len(dKEGGTENSEMBLT)
+	cnt=0
+	#~
+	if statsTest=='fisher':
+		ptest=fisherexct
+	elif statsTest=='hypergeometric':
+		ptest=hypergeo_sf
+	elif statsTest=='binomial':
+		ptest=binProb
+	#~
+	ltfreqs=[]
+	for echKEGGT in dKEGGTENSEMBLT:
+		cnt+=1
+		CntKEGG_All=len(dKEGGTENSEMBLT[echKEGGT])
+		SAPs_KEGG=len(dKEGGTENSEMBLT[echKEGGT].intersection(sENSEMBLTSAPsinKEGG))
+		NoSAPs_KEGG=CntKEGG_All-SAPs_KEGG
+		probLow,probHigh=ptest(SAPs_KEGG,NoSAPs_KEGG,SAPs_all,NoSAPs_all,CntKEGG_All,totalSAPs,pKEGG)
+		ltfreqs.append([(SAPs_KEGG/Decimal(CntKEGG_All)),SAPs_KEGG,probLow,probHigh,echKEGGT])
+	#~
+	ltfreqs.sort()
+	ltfreqs.reverse()
+	outl=[]
+	cper,crank=Decimal('2'),0
+	#~
+	getcontext().prec=2#set 2 decimal places
+	for perc,cnt_go,pvalLow,pvalHigh,goTerm in ltfreqs:
+		if perc<cper:
+			crank+=1
+			cper=perc
+		outl.append('\t'.join([str(cnt_go),str(Decimal(perc)*Decimal('1.0')),str(crank),str(Decimal(pvalLow)*Decimal('1.0')),str(Decimal(pvalHigh)*Decimal('1.0')),goTerm]))
+	#~
+	return outl
+
+
+def main():
+	#~
+	parser = argparse.ArgumentParser(description='Returns the count of genes in KEGG categories and their statistical overrrepresentation, from a list of genes and an background file (i.e. plane text with ENSEMBLT and KEGG pathways).')
+	parser.add_argument('--input',metavar='input TXT file',type=str,help='the input file with the table in txt format.',required=True)
+	parser.add_argument('--inBckgrndfile',metavar='input TXT file',type=str,help='the input file with the background table in txt format.',required=True)
+	parser.add_argument('--output',metavar='output TXT file',type=str,help='the output file with the table in txt format.',required=True)
+	parser.add_argument('--columnENSEMBLT',metavar='column number',type=int,help='column with the ENSEMBL transcript code in the input file.',required=True)
+	parser.add_argument('--columnENSEMBLTBckgrnd',metavar='column number',type=int,help='column with the ENSEMBL transcript code in the background file.',required=True)
+	parser.add_argument('--columnKEGGBckgrnd',metavar='column number',type=int,help='column with the KEGG pathways in the background file.',required=True)
+	parser.add_argument('--statsTest',metavar='input TXT file',type=str,help='statistical test to compare KEGG pathways (i.e. fisher, hypergeometric, binomial).',required=True)
+
+	args = parser.parse_args()
+
+	inSAPsfile = args.input
+	inBckgrndfile = args.inBckgrndfile
+	saleKEGGPCount = args.output
+	columnENSEMBLT = args.columnENSEMBLT
+	columnENSEMBLTBckgrnd = args.columnENSEMBLTBckgrnd
+	columnKEGGBckgrnd = args.columnKEGGBckgrnd
+	statsTest = args.statsTest
+	columnENSEMBLT-=1
+	columnENSEMBLTBckgrnd-=1
+	columnKEGGBckgrnd=-1
+	#~
+	dKEGGTENSEMBLT,ENSEMBLTGinKEGG=rtrnKEGGcENSEMBLc(inBckgrndfile,columnENSEMBLTBckgrnd,columnKEGGBckgrnd)
+	sENSEMBLTSAPsinKEGG=rtrnENSEMBLcSAPs(inSAPsfile,columnENSEMBLT,ENSEMBLTGinKEGG)
+	outl=rtrnCounts(dKEGGTENSEMBLT,ENSEMBLTGinKEGG,sENSEMBLTSAPsinKEGG,statsTest)
+	#~
+	saleKEGGPCount=open(saleKEGGPCount,'w')
+	saleKEGGPCount.write('\n'.join(outl))
+	saleKEGGPCount.close()
+	#~
+	return 0
+
+if __name__ == '__main__':
+	main()
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/rank_terms.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,204 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+#       GOFisher.py
+#
+#       Copyright 2013 Oscar Reina <oscar@niska.bx.psu.edu>
+#
+#       This program is free software; you can redistribute it and/or modify
+#       it under the terms of the GNU General Public License as published by
+#       the Free Software Foundation; either version 2 of the License, or
+#       (at your option) any later version.
+#
+#       This program is distributed in the hope that it will be useful,
+#       but WITHOUT ANY WARRANTY; without even the implied warranty of
+#       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#       GNU General Public License for more details.
+#
+#       You should have received a copy of the GNU General Public License
+#       along with this program; if not, write to the Free Software
+#       Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+#       MA 02110-1301, USA.
+
+import argparse
+import os
+import sys
+from fisher import pvalue as fisher
+from decimal import Decimal,getcontext
+from math import lgamma,exp,factorial
+
+def binProb(SAPs_GO,NoSAPs_GO,SAPs_all,NoSAPs_all,CntGO_All,totalSAPs,pGO):
+	"""
+	Returns binomial probability.
+	"""
+	def comb(CntGO_All,k):
+		return factorial(CntGO_All) / Decimal(str(factorial(k)*factorial(CntGO_All-k)))
+	probLow = 0
+	for k in range(0, SAPs_GO+1):
+		cp=Decimal(str(comb(CntGO_All,k)))
+		bp=Decimal(str(pGO**k))
+		dp=Decimal(str(1.0-pGO))**Decimal(str(CntGO_All-k))
+		probLow+=cp*bp*dp
+	#~
+	probHigh = 0
+	for k in range(int(SAPs_GO),CntGO_All+1):
+		cp=Decimal(str(comb(CntGO_All,k)))
+		bp=Decimal(str(pGO**k))
+		dp=Decimal(str(1.0-pGO))**Decimal(str(CntGO_All-k))
+		probHigh+=cp*bp*dp
+	return probLow,probHigh
+
+def gauss_hypergeom(X, CntGO_All, SAPs_all, totalSAPs):
+	CntGO_All,SAPs_all,totalSAPs
+	"""
+	Returns the probability of drawing X successes of SAPs_all marked items
+	in CntGO_All draws from a bin of totalSAPs total items
+	"""
+	def logchoose(ni, ki):
+		try:
+			lgn1 = lgamma(ni+1)
+			lgk1 = lgamma(ki+1)
+			lgnk1 = lgamma(ni-ki+1)
+		except ValueError:
+			raise ValueError
+		return lgn1 - (lgnk1 + lgk1)
+	#~
+	r1 = logchoose(SAPs_all, X)
+	try:
+		r2 = logchoose(totalSAPs-SAPs_all, CntGO_All-X)
+	except ValueError:
+		return 0
+	r3 = logchoose(totalSAPs,CntGO_All)
+	return exp(r1 + r2 - r3)
+
+def hypergeo_sf(SAPs_GO,NoSAPs_GO,SAPs_all,NoSAPs_all,CntGO_All,totalSAPs,pGO):
+	"""
+	Runs Hypergeometric probability test
+	"""
+	s = 0
+	t=0
+	for i in range(SAPs_GO,min(SAPs_all,CntGO_All)+1):
+		s += max(gauss_hypergeom(i,CntGO_All,SAPs_all,totalSAPs), 0.0)
+	for i in range(0, SAPs_GO+1):
+		t += max(gauss_hypergeom(i,CntGO_All,SAPs_all,totalSAPs), 0.0)
+	return min(max(t,0.0), 1),min(max(s,0.0), 1)
+
+def fisherexct(SAPs_GO,NoSAPs_GO,SAPs_all,NoSAPs_all,CntGO_All,totalSAPs,pGO):
+	"""
+	Runs Fisher's exact test
+	"""
+	ftest=fisher(SAPs_GO,NoSAPs_GO,SAPs_all,NoSAPs_all)
+	probLow,probHigh=ftest.left_tail,ftest.right_tail
+	return probLow,probHigh
+
+def rtrnGOcENSEMBLc(inExtnddfile,columnENSEMBLTExtndd,columnGOExtndd):
+	"""
+	"""
+	dGOTENSEMBLT={}
+	for eachl in open(inExtnddfile,'r'):
+		if eachl.strip():
+			ENSEMBLT=eachl.splitlines()[0].split('\t')[columnENSEMBLTExtndd]
+			GOTs=set(eachl.splitlines()[0].split('\t')[columnGOExtndd].split('.'))
+			GOTs=GOTs.difference(set(['','U','N']))
+			for GOT in GOTs:
+				try:
+					dGOTENSEMBLT[GOT].add(ENSEMBLT)
+				except:
+					dGOTENSEMBLT[GOT]=set([ENSEMBLT])
+	ENSEMBLTGinGO=set.union(*dGOTENSEMBLT.values())
+	return dGOTENSEMBLT,ENSEMBLTGinGO
+
+def rtrnENSEMBLcSAPs(inSAPsfile,columnENSEMBLT,ENSEMBLTGinGO):
+	"""
+	returns a set of the ENSEMBLT codes present in the input list and
+	in the GO file
+	"""
+	sENSEMBLTSAPsinGO=set()
+	for eachl in open(inSAPsfile,'r'):
+		ENSEMBLT=eachl.splitlines()[0].split('\t')[columnENSEMBLT]
+		if ENSEMBLT in ENSEMBLTGinGO:
+			sENSEMBLTSAPsinGO.add(ENSEMBLT)
+	return sENSEMBLTSAPsinGO
+
+def rtrnCounts(dGOTENSEMBLT,ENSEMBLTGinGO,sENSEMBLTSAPsinGO,statsTest):
+	"""
+	returns a list of the ENSEMBLT codes present in the input list and
+	in the GO file. The terms in this list are: 'Go Term','# Genes in
+	the GO Term','# Genes in the list and in the GO Term','Enrichement
+	of the GO Term for genes in the input list','Genes in the input list
+	present in the GO term'
+	"""
+	totalSAPs=len(ENSEMBLTGinGO)
+	SAPs_all=len(sENSEMBLTSAPsinGO)
+	NoSAPs_all=totalSAPs-SAPs_all
+	pGO=SAPs_all/float(totalSAPs)
+	#~
+	lp=len(dGOTENSEMBLT)
+	cnt=0
+	#~
+	if statsTest=='fisher':
+		ptest=fisherexct
+	elif statsTest=='hypergeometric':
+		ptest=hypergeo_sf
+	elif statsTest=='binomial':
+		ptest=binProb
+	#~
+	ltfreqs=[]
+	for echGOT in dGOTENSEMBLT:
+		cnt+=1
+		CntGO_All=len(dGOTENSEMBLT[echGOT])
+		SAPs_GO=len(dGOTENSEMBLT[echGOT].intersection(sENSEMBLTSAPsinGO))
+		NoSAPs_GO=CntGO_All-SAPs_GO
+		probLow,probHigh=ptest(SAPs_GO,NoSAPs_GO,SAPs_all,NoSAPs_all,CntGO_All,totalSAPs,pGO)
+		ltfreqs.append([(SAPs_GO/Decimal(CntGO_All)),SAPs_GO,probLow,probHigh,echGOT])
+	#~
+	ltfreqs.sort()
+	ltfreqs.reverse()
+	outl=[]
+	cper,crank=Decimal('2'),0
+	#~
+	getcontext().prec=2#set 2 decimal places
+	for perc,cnt_go,pvalLow,pvalHigh,goTerm in ltfreqs:
+		if perc<cper:
+			crank+=1
+			cper=perc
+		outl.append('\t'.join([str(cnt_go),str(Decimal(perc)*Decimal('1.0')),str(crank),str(Decimal(pvalLow)*Decimal('1.0')),str(Decimal(pvalHigh)*Decimal('1.0')),goTerm]))
+	#~
+	return outl
+
+
+def main():
+	#~
+	parser = argparse.ArgumentParser(description='Returns the count of genes in GO categories and their statistical overrrepresentation, from a list of genes and an extended file (i.e. plane text with ENSEMBLT and GO terms).')
+	parser.add_argument('--input',metavar='input TXT file',type=str,help='the input file with the table in txt format.',required=True)
+	parser.add_argument('--inExtnddfile',metavar='input TXT file',type=str,help='the input file with the extended table in txt format.',required=True)
+	parser.add_argument('--output',metavar='output TXT file',type=str,help='the output file with the table in txt format.',required=True)
+	parser.add_argument('--columnENSEMBLT',metavar='column number',type=int,help='column with the ENSEMBL transcript code in the input file.',required=True)
+	parser.add_argument('--columnENSEMBLTExtndd',metavar='column number',type=int,help='column with the ENSEMBL transcript code in the extended file.',required=True)
+	parser.add_argument('--columnGOExtndd',metavar='column number',type=int,help='column with the GO terms in the extended file.',required=True)
+	parser.add_argument('--statsTest',metavar='input TXT file',type=str,help='statistical test to compare GO terms (i.e. fisher, hypergeometric, binomial).',required=True)
+
+	args = parser.parse_args()
+
+	inSAPsfile = args.input
+	inExtnddfile = args.inExtnddfile
+	saleGOPCount = args.output
+	columnENSEMBLT = args.columnENSEMBLT
+	columnENSEMBLTExtndd = args.columnENSEMBLTExtndd
+	columnGOExtndd = args.columnGOExtndd
+	statsTest = args.statsTest
+
+	#~
+	dGOTENSEMBLT,ENSEMBLTGinGO=rtrnGOcENSEMBLc(inExtnddfile,columnENSEMBLTExtndd,columnGOExtndd)
+	sENSEMBLTSAPsinGO=rtrnENSEMBLcSAPs(inSAPsfile,columnENSEMBLT,ENSEMBLTGinGO)
+	outl=rtrnCounts(dGOTENSEMBLT,ENSEMBLTGinGO,sENSEMBLTSAPsinGO,statsTest)
+	#~
+	saleGOPCount=open(saleGOPCount,'w')
+	saleGOPCount.write('\n'.join(outl))
+	saleGOPCount.close()
+	#~
+	return 0
+
+if __name__ == '__main__':
+	main()
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/rank_terms.xml	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,65 @@
+<tool id="gd_rank_terms" name="Rank Terms" version="1.1.0">
+  <description>: Assess the enrichment/depletion of a gene set for GO terms</description>
+
+  <command interpreter="python">
+    #set $t_col1_0 = int(str($t_col1)) - 1
+    #set $t_col2_0 = int(str($t_col2)) - 1
+    #set $g_col2_0 = int(str($g_col2)) - 1
+    rank_terms.py --input "$input1" --columnENSEMBLT $t_col1_0 --inExtnddfile "$input2" --columnENSEMBLTExtndd $t_col2_0 --columnGOExtndd $g_col2_0 --statsTest "$stat" --output "$output"
+  </command>
+
+  <inputs>
+    <param name="input1" type="data" format="tabular" label="Query dataset" />
+    <param name="t_col1" type="data_column" data_ref="input1" label="Column with ENSEMBL transcript codes" />
+    <param name="input2" type="data" format="tabular" label="Background dataset" />
+    <param name="t_col2" type="data_column" data_ref="input2" label="Column with ENSEMBL transcript codes" />
+    <param name="g_col2" type="data_column" data_ref="input2" label="Column with GO terms" />
+    <param name="stat" type="select" label="Statistic for determining enrichment/depletion">
+      <option value="fisher" selected="true">two-tailed Fisher's exact test</option>
+      <option value="hypergeometric">hypergeometric test</option>
+      <option value="binomial">binomial probability</option>
+    </param>
+  </inputs>
+
+  <outputs>
+    <data name="output" format="tabular" />
+  </outputs>
+
+  <requirements>
+    <requirement type="package" version="0.1.4">fisher</requirement>
+  </requirements>
+
+  <help>
+
+**Dataset formats**
+
+All of the input and output datasets are in tabular_ format.
+The query dataset has a column containing ENSEMBL transcript codes for
+the gene set of interest, while the background dataset has one column
+with ENSEMBL transcript codes and another with GO terms, for some
+larger universe of genes.
+The output dataset is described below.
+(`Dataset missing?`_)
+
+.. _tabular: ./static/formatHelp.html#tab
+.. _Dataset missing?: ./static/formatHelp.html
+
+-----
+
+**What it does**
+
+Given a query set of genes from a larger background dataset, this tool
+evaluates the over- or under-representation of Gene Ontology terms in the
+query set, using the specified statistical test.
+
+The output contains a row for each GO term, with the following columns:
+
+1. count: the number of genes in the query set that are in this GO category
+2. representation: the percentage of this category's genes (from the background dataset) that appear in the query set
+3. ranking of this term, based on its representation ("1" is highest)
+4. probability of depletion of this GO category in the query dataset
+5. probability of enrichment of this GO category in the query dataset
+6. GO term
+
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/raxml.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,51 @@
+#!/usr/bin/env python
+
+import random
+import sys
+import shutil
+import gd_util
+
+################################################################################
+
+if len(sys.argv) != 3:
+    gd_util.die('Usage')
+
+input, output = sys.argv[1:]
+random.seed()
+
+################################################################################
+
+prog = 'raxmlHPC'
+
+args = [ prog ]
+
+## required: -s sequenceFileName -n outputFileName -m substitutionModel
+## we supply -s, -n (they are not allowed from user)
+
+args.append('-s')           # name of the alignment data file in PHYLIP format
+args.append(input)
+
+args.append('-n')           # name of the output file
+args.append('fake')
+
+## default options
+args.append('-m')           # substitutionModel
+args.append('GTRGAMMA')     # GTR + Optimization of substitution rates + GAMMA model of rate
+                            # heterogeneity (alpha parameter will be estimated)
+
+args.append('-N')           # number of alternative runs on distinct starting trees
+args.append(1000)
+
+args.append('-f')           # select algorithm
+args.append('a')            # rapid Bootstrap analysis and search for
+                            # best-scoring ML tree in one program run
+
+args.append('-x')           # integer random seed and turn on rapid bootstrapping
+args.append(random.randint(0,100000000000000))
+
+args.append('-p')           # random seed for parsimony inferences
+args.append(random.randint(0,100000000000000))
+
+gd_util.run_program(prog, args)
+shutil.copy2('RAxML_bipartitions.fake', output)
+sys.exit(0)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/raxml.xml	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,37 @@
+<tool id="gd_raxml" name="RAxML" version="1.0.0">
+  <description>: construct a maximum-likelihood phylogenetic tree</description>
+
+  <command interpreter="python">
+    raxml.py '$input' '$output'
+  </command>
+
+  <inputs>
+    <param name="input" type="data" format="txt" label="PHYLIP dataset" />
+  </inputs>
+
+  <outputs>
+    <data name="output" format="nhx" />
+  </outputs>
+
+
+  <requirements>
+    <requirement type="package" version="7.7.6">raxml</requirement>
+  </requirements>
+
+  <help>
+**What it does**
+
+This tool runs RAxML on a phylip formatted file and returns a maximum
+likelihood phylogram supported by a desired number of bootstraps.
+
+This program takes as input a phylip formatted file and optionally a
+number of parameters (for further information consult the manual_),
+and returns a Newick formatted tree that can be explored with Phyloviz.
+
+By default the program runs 1,000 fast bootstraps on the best likelihood
+tree constructed with the GRT + gamma model.
+
+.. _manual: http://sco.h-its.org/exelixis/oldPage/RAxML-Manual.7.0.4.pdf
+
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/reorder.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,98 @@
+#!/usr/bin/env python
+
+import sys
+
+def parse_rangelist(string):
+    rv = []
+
+    tokens = strip_split(string, ',')
+    for token in tokens:
+        int_list = parse_token(token)
+        for int_val in int_list:
+            int_val -= 1
+            if int_val not in rv:
+                rv.append(int_val)
+
+    return rv
+
+def parse_token(token):
+    values = strip_split(token, '-')
+    num_values = len(values)
+
+    if num_values not in [1, 2]:
+        print >> sys.stderr, 'Error: "%s" is not a valid range' % token
+        sys.exit(1)
+
+    int_list = []
+    for value in values:
+        if value:
+            int_val = as_int(value)
+
+            if int_val < 1:
+                print >> sys.stderr, 'Error: "%s" is not >= 1' % value
+                sys.exit(1)
+
+            int_list.append(int_val)
+        else:
+            print >> sys.stderr, 'Error: "%s" is not a valid range' % token
+            sys.exit(1)
+
+    if num_values == 1:
+        return int_list
+
+    a, b = int_list
+
+    if a <= b:
+        return range(a, b+1)
+    else:
+        return range(a, b-1, -1)
+
+def strip_split(string, delim):
+    return [elem.strip() for elem in string.split(delim)]
+
+def as_int(string):
+    try:
+        val = int(string)
+    except:
+        print >> sys.stderr, 'Error: "%s" does not appear to be an integer' % string
+        sys.exit(1)
+    return val
+
+def get_lines(filename):
+    rv = []
+
+    fh = open(filename)
+    for line in fh:
+        line = line.rstrip('\r\n')
+        rv.append(line)
+    fh.close()
+
+    return rv
+
+def reorder(old_lines, new_order, filename):
+    max_index = len(old_lines) - 1
+
+    fh = open(filename, 'w')
+
+    for index in new_order:
+        if index <= max_index:
+            print >> fh, old_lines[index]
+            old_lines[index] = None
+
+    for line in old_lines:
+        if line is not None:
+            print >> fh, line
+
+    fh.close()
+
+if len(sys.argv) != 4:
+    print >> sys.stderr, "Usage"
+    sys.exit(1)
+
+input, output, order_string = sys.argv[1:]
+
+new_order = parse_rangelist(order_string)
+old_lines = get_lines(input)
+reorder(old_lines, new_order, output)
+
+sys.exit(0)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/reorder.xml	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,78 @@
+<tool id="gd_reorder" name="Reorder individuals" version="1.0.0">
+  <description>: exchange rows in the above picture</description>
+
+  <command interpreter="python">
+    reorder.py '$input' '$output' '$order'
+  </command>
+
+  <inputs>
+    <param name="input" type="data" format="gd_indivs" label="Individuals dataset" />
+    <param name="order" size="40" type="text" value="" label="New order"/>
+  </inputs>
+
+  <outputs>
+    <data name="output" format="gd_indivs" metadata_source="input"/>
+  </outputs>
+
+  <help>
+**Dataset formats**
+
+The input and output datasets are in gd_indivs_ format.
+
+.. _gd_indivs: ./static/formatHelp.html#gd_indivs
+
+-----
+
+**What it does**
+
+The user picks a gd_indivs dataset from their history and specifies
+a new ordering.  This tool creates a new gd_indivs dataset with the
+individuals reordered as specified by the user.
+
+The new ordering is a list of comma separated ranges (e.g **5,6-12,20**).
+Ranges can be either a single number (e.g. **3**) or two dash separated
+numbers (e.g. **3-5**).  The numbers represent the line number of
+gd_indivs dataset.  Line numbers that are not listed will appear on the
+output after the specified line numbers in their same relative ordering.
+
+-----
+
+**Example**
+
+Input dataset (six rows)::
+
+   18  McClintock
+   22  Peltonen-Palotie
+   26  Sager
+   30  Franklin
+   34  Auerbach
+   38  Stevens
+
+new ordering "**1,3-4**" will return::
+
+   18  McClintock
+   26  Sager
+   30  Franklin
+   22  Peltonen-Palotie
+   34  Auerbach
+   38  Stevens
+
+new ordering "**3,5,1,6**" will return::
+
+   26  Sager
+   34  Auerbach
+   18  McClintock
+   38  Stevens
+   22  Peltonen-Palotie
+   30  Franklin
+
+new ordering "**3-1,6,4-5**" will return::
+
+   26  Sager
+   22  Peltonen-Palotie
+   18  McClintock
+   38  Stevens
+   30  Franklin
+   34  Auerbach
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/restore_attributes.xml	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,75 @@
+<tool id="gd_restore_attributes" name="Restore Attributes" version="1.1.0">
+  <description>: Fill in missing properties for a gd_snp or gd_genotype dataset</description>
+
+  <command interpreter="python">
+    cp.py "$dst" "$output"
+  </command>
+
+  <inputs>
+    <conditional name="input_type">
+      <param name="choice" type="select" format="integer" label="Input format">
+        <option value="0" selected="true">gd_snp</option>
+        <option value="1">gd_genotype</option>
+      </param>
+
+      <when value="0">
+        <param name="input" type="data" format="gd_snp" label="SNP dataset to copy attributes from" />
+        <param name="dst" type="data" format="gd_snp" label="SNP dataset to receive attributes" />
+      </when>
+      <when value="1">
+        <param name="input" type="data" format="gd_genotype" label="Genotype dataset to copy attributes from" />
+        <param name="dst" type="data" format="gd_genotype" label="Genotype dataset to receive attributes" />
+      </when>
+    </conditional>
+  </inputs>
+
+  <outputs>
+    <data name="output" format="input" format_source="input" metadata_source="input" />
+  </outputs>
+
+  <help>
+
+**Dataset formats**
+
+All of the input and output datasets are in gd_snp_ or gd_genotype_ format.  (`Dataset missing?`_)
+
+.. _gd_snp: ./static/formatHelp.html#gd_snp
+.. _gd_genotype: ./static/formatHelp.html#gd_genotype
+.. _Dataset missing?: ./static/formatHelp.html
+
+-----
+
+**What it does**
+
+This tool copies metadata information from one SNP dataset to another, leaving
+the actual SNP data itself unchanged.  Datasets in gd_snp format have a number
+of "extra" properties associated with them, such as the focus species (which
+may be different from the reference assembly), names of individuals, column
+numbers containing certain data fields, etc.  These values are stored in the
+dataset's metadata, in addition to the more usual attributes like dataset name,
+assembly build, and so forth.  You can see some of these by clicking on the
+pencil icon for the dataset.
+
+The Genome Diversity tools need this information to perform their tasks.
+However, these additional attributes may be lost if the datatype is changed.
+For example, suppose you want to see which SNPs overlap some other dataset in
+your history, like coding regions or TAL1 binding sites.  The Intersect tool
+only works on datasets that are in interval format, so you might use the Compute
+tool to append a new column with the End position of the SNP (= Start + 1),
+then use the pencil icon to change the datatype to "interval".  This works
+great for doing the intersection, but if you then want to run one of the Genome
+Diversity tools on the resulting SNPs, there's a problem: you can change the
+datatype back to gd_snp easily enough, but the extra attributes have been lost
+in the conversion to interval.
+
+As long as the proper values of the lost attributes have not changed, then this
+tool can restore them by copying from the old gd_snp dataset in your history.
+In the above example, appending a column does not change the numbering of the
+earlier columns, and deleting rows via Intersect does not affect the extra
+attributes either.  Note that all of the metadata is copied, not just the extra
+attributes specific to gd_snp (though standard items like the assembly build,
+the number of lines, and the name for the output dataset are updated
+automatically by the Galaxy framework).
+
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/rtrnKEGGpthwfENSEMBLTc.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,78 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+#       calclenchange.py
+#
+#       Copyright 2011 Oscar Bedoya-Reina <oscar@niska.bx.psu.edu>
+#
+#       This program is free software; you can redistribute it and/or modify
+#       it under the terms of the GNU General Public License as published by
+#       the Free Software Foundation; either version 2 of the License, or
+#       (at your option) any later version.
+#
+#       This program is distributed in the hope that it will be useful,
+#       but WITHOUT ANY WARRANTY; without even the implied warranty of
+#       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#       GNU General Public License for more details.
+#
+#       You should have received a copy of the GNU General Public License
+#       along with this program; if not, write to the Free Software
+#       Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+#       MA 02110-1301, USA.
+
+import argparse,os,sys
+
+
+def main():
+	parser = argparse.ArgumentParser(description='Adds the fields KEGG gene codes and KEGG pathways to an input table of ENSEMBL transcript codes.')
+	parser.add_argument('--loc_file',metavar='correlational database',type=str,help='correlational database')
+	parser.add_argument('--species',metavar='species name',type=str,help='the species of interest in loc_file')
+	parser.add_argument('--output',metavar='output TXT file',type=str,help='the output file with the table in txt format. The output will have two more fields: KEGG gene codes and KEGG pathways of each ENSEMBL code' )
+	parser.add_argument('--posENSEMBLclmn',metavar='column number',type=int,help='the column with the ENSEMBLE transcript code')
+	parser.add_argument('--input',metavar='input TXT file',type=str,help='the input file with the table in txt format')
+	#~
+	#~Open arguments
+	class C(object):
+		pass
+	fulargs=C()
+	parser.parse_args(sys.argv[1:],namespace=fulargs)
+	#test input vars
+	inputf,loc_file,species,output,posENSEMBLclmn=fulargs.input,fulargs.loc_file,fulargs.species,fulargs.output,fulargs.posENSEMBLclmn
+	posENSEMBLclmn-=1#correct pos
+	#~ Get the extra variables
+	crDB=[x.split() for x in open(loc_file).read().splitlines() if x.split()[0]==species][0]
+	sppPrefx,dinput=crDB[0],crDB[1]#X should be replaced by the position in which the Conversion Dictionary File (CDF) is placed
+	#make a dictionary of the input CDF
+	dKEGGcPthws=dict([(x.split('\t')[0],'\t'.join(x.split('\t')[1:])) for x in open(dinput).read().splitlines() if x.strip()])
+	#~ add the two new columns
+	sall=[]
+	#lENSEMBLTc=[x.split('\t') for x in open(inputf).read().splitlines() if x.strip()]
+	lENSEMBLTc = []
+	with open(inputf) as fh:
+	    for line in fh:
+	        if line.startswith('#'):
+	            continue
+	        lENSEMBLTc.append(line.rstrip('\r\n').split('\t'))
+	nLines=len(lENSEMBLTc)
+	cLines=0
+	sall=[]#the output list for with additional fields
+	#~
+	while cLines<nLines:
+		cLines+=1
+		lENSEMBLTcKEGGgKEGGpth=lENSEMBLTc.pop(0)
+		ENSEMBLTc=lENSEMBLTcKEGGgKEGGpth[posENSEMBLclmn]
+		try:
+			KEGGgKEGGpth=dKEGGcPthws[ENSEMBLTc]
+		except:
+			KEGGgKEGGpth='\t'.join(['U','N'])
+		sall.append('\t'.join(['\t'.join(lENSEMBLTcKEGGgKEGGpth),KEGGgKEGGpth]))
+	#~
+	salef=open(output,'w')
+	salef.write('\n'.join(sall))
+	salef.close()
+	return 0
+
+
+if __name__ == '__main__':
+	main()
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/select_snps.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,153 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import math
+from optparse import OptionParser
+import genome_diversity as gd
+
+def main_function(parse_arguments=None):
+    if parse_arguments is None:
+        parse_arguments = lambda arguments: (None, arguments)
+    def main_decorator(to_decorate):
+        def decorated_main(arguments=None):
+            if arguments is None:
+                arguments = sys.argv
+            options, arguments = parse_arguments(arguments)
+            sys.exit(to_decorate(options, arguments))
+        return decorated_main
+    return main_decorator
+
+def parse_arguments(arguments):
+    parser = OptionParser()
+    parser.add_option('--input', dest='input')
+    parser.add_option('--output', dest='output')
+    parser.add_option('--index_dir', dest='index_dir')
+    parser.add_option('--num_snps', dest='num_snps')
+    parser.add_option('--ref_chrom_col', dest='ref_chrom_col')
+    parser.add_option('--ref_pos_col', dest='ref_pos_col')
+    parser.add_option('--ref_species', dest='ref_species')
+    return parser.parse_args(arguments[1:])
+
+@main_function(parse_arguments)
+def main(options, arguments):
+
+    ref_chrom_idx = to_int( options.ref_chrom_col ) -1
+    ref_pos_idx = to_int( options.ref_pos_col ) -1
+
+    if (ref_chrom_idx < 1) or (ref_pos_idx < 1) or (ref_chrom_idx == ref_pos_idx):
+        print >> sys.stderr, "Cannot locate reference genome sequence (ref) or reference genome position (rPos) column for this dataset."
+        sys.exit(1)
+
+    chrom_len_root = os.path.join( options.index_dir, 'shared/ucsc/chrom')
+    chrom_len_file = '%s.len' % options.ref_species
+    chrom_len_path = os.path.join(chrom_len_root, chrom_len_file)
+
+    chrlens = gd.ChrLens( chrom_len_path )
+
+    total_len = 0
+    for chrom in chrlens:
+        total_len += chrlens.length(chrom)
+
+    total_requested = int( options.num_snps )
+    lines, data, comments = get_snp_lines_data_and_comments( options.input, ref_chrom_idx, ref_pos_idx )
+    selected = select_snps( data, total_len, total_requested )
+    out_data = fix_selection_and_order_like_input(data, selected, total_requested)
+    write_selected_snps( options.output, out_data, lines, comments )
+
+def to_int( value ):
+    try:
+        int_value = int( value )
+    except ValueError:
+        int_value = 0
+    return int_value
+
+def get_snp_lines_data_and_comments( filename, chrom_idx, pos_idx ):
+    fh = open( filename, 'r' )
+    if (chrom_idx >= pos_idx):
+        needed = chrom_idx + 1
+    else:
+        needed = pos_idx + 1
+    lines = []
+    data = []
+    comments = []
+    line_idx = 0
+    line_num = 0
+    for line in fh:
+        line_num += 1
+        line = line.rstrip('\r\n')
+        if line:
+            if line.startswith('#'):
+                comments.append(line)
+            else:
+                elems = line.split('\t')
+                if len(elems) >= needed:
+                    chrom = elems[chrom_idx]
+                    try:
+                        pos = int(elems[pos_idx])
+                    except ValueError:
+                        sys.stderr.write( "bad reference position in line %d column %d: %s\n" % ( line_num, pos_idx+1, elems[pos_idx] ) )
+                        sys.exit(1)
+                    lines.append(line)
+                    chrom_sort = chrom.lstrip('chr')
+                    data.append( [chrom_sort, chrom, pos, line_num, line_idx] )
+                    line_idx += 1
+    fh.close()
+    data = sorted( data, key=lambda x: (x[0], x[2]) )
+    return lines, data, comments
+
+def select_snps( data, total_len, requested ):
+    old_chrom = None
+    next_print = 0
+    selected = []
+    space = total_len / requested
+    for data_idx, datum in enumerate( data ):
+        chrom = datum[1]
+        pos = datum[2]
+        if chrom != old_chrom:
+            old_chrom = chrom
+            next_print = 0
+        if pos >= next_print:
+            selected.append(data_idx)
+            next_print += space
+    return selected
+
+def fix_selection_and_order_like_input(data, selected, requested):
+    total_selected = len( selected )
+    a = float( total_selected ) / requested
+    b = a / 2
+
+    idx_list = []
+    for i in range( requested ):
+        idx = int( math.ceil( i * a + b ) - 1 )
+        idx_list.append( idx )
+
+    out_data = []
+
+    for i, data_idx in enumerate(selected):
+        if total_selected > requested:
+            if i in idx_list:
+                out_data.append(data[data_idx])
+        else:
+            out_data.append(data[data_idx])
+
+    out_data = sorted( out_data, key=lambda x: x[3] )
+
+    return out_data
+
+def write_selected_snps( filename, data, lines, comments ):
+    fh = open( filename, 'w' )
+
+    for comment in comments:
+        fh.write("%s\n" % comment )
+
+    for datum in data:
+        line_idx = datum[4]
+        fh.write("%s\n" % lines[line_idx])
+
+    fh.close()
+
+if __name__ == "__main__":
+    main()
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/select_snps.xml	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,99 @@
+<tool id="gd_select_snps" name="Sample SNPs" version="1.0.0">
+  <description>: Select a specified number of SNPs, uniformly spaced</description>
+
+  <command interpreter="python">
+    select_snps.py "--input=$input" "--output=$output" "--index_dir=$GALAXY_DATA_INDEX_DIR" "--num_snps=$num_snps"
+    #if $override_metadata.choice == "0":
+      "--ref_chrom_col=${input.metadata.ref}" "--ref_pos_col=${input.metadata.rPos}" "--ref_species=${input.metadata.dbkey}"
+    #else
+      "--ref_chrom_col=$ref_col" "--ref_pos_col=$rpos_col" "--ref_species=$ref_species"
+    #end if
+  </command>
+
+  <inputs>
+    <param format="tabular" name="input" type="data" label="SNP dataset">
+      <validator type="unspecified_build" message="This dataset does not have a reference species and cannot be used with this tool" />
+    </param>
+    <param name="num_snps" type="integer" value="10" optional="false" min="1" label="Number of SNPs"/>
+    <conditional name="override_metadata">
+      <param name="choice" type="select" format="integer" label="Choose columns" help="Datasets in gd_snp format have the column information in the metadata, all others must be chosen." >
+        <option value="0" selected="true">No, get columns from metadata</option>
+        <option value="1" >Yes, choose columns</option>
+      </param>
+      <when value="0" />
+      <when value="1">
+        <param name="ref_col" type="data_column" data_ref="input" numerical="false" label="Column with reference chromosome"/>
+        <param name="rpos_col" type="data_column" data_ref="input" numerical="true" label="Column with reference position"/>
+        <param name="ref_species" type="select" label="Choose reference species">
+          <options from_file="gd.ref_species.txt">
+            <column name="name" index="1"/>
+            <column name="value" index="0"/>
+          </options>
+        </param>
+      </when>
+    </conditional>
+  </inputs>
+
+  <outputs>
+    <data format="gd_snp" name="output" metadata_source="input"/>
+  </outputs>
+
+  <tests>
+    <test>
+      <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp"/>
+      <param name="num_snps" value="100"/>
+      <param name="choice" value="0"/>
+      <output name="output" file="test_out/select_snps/select_snps.gd_snp" />
+    </test>
+  </tests>
+
+
+  <help>
+
+**Dataset formats**
+
+The input and output datasets are in tabular_ format.
+(`Dataset missing?`_)
+
+.. _tabular: ./static/formatHelp.html#tab
+.. _Dataset missing?: ./static/formatHelp.html
+
+-----
+
+**What it does**
+
+This tool attempts to select a specified number of SNPs from the dataset, making
+them approximately uniformly spaced relative to the reference genome. The number
+actually selected may be slightly more than the specified number.
+
+-----
+
+**Example**
+
+- input (gd_snp format)::
+
+    chr2_75111355_75112576    314  A  C  L  F  chr2   75111676  C  F  15  4  53   2   9  48   Y  96   0.369  0.355  0.396  0
+    chr8_93901796_93905612   2471  A  C  A  A  chr8   93904264  A  A  8   0  51   10  2  14   Y  961  0.016  0.534  0.114  2
+    chr10_7434473_7435447    524   T  C  S  S  chr10  7435005   T  S  11  5  90   14  0  69   Y  626  0.066  0.406  0.727  0
+    chr14_80021455_80022064  138   G  A  H  H  chr14  80021593  G  H  14  0  69   9   6  124  Y  377  0.118  0.997  0.195  1
+    chr15_64470252_64471048  89    G  A  Y  Y  chr15  64470341  G  Y  5   6  109  14  0  69   Y  312  0.247  0.998  0.393  0
+    chr18_48070585_48071386  514   C  T  E  K  chr18  48071100  T  K  7   7  46   14  0  69   Y  2    0.200  0.032  0.163  0
+    chr18_50154905_50155664  304   A  G  Y  C  chr18  50155208  A  Y  4   2  17   5   1  22   Y  8    0.022  0.996  0.128  0
+    chr18_57379354_57380496  315   C  T  V  V  chr18  57379669  G  V  11  0  60   9   6  62   Y  726  0.118  0.048  0.014  1
+    chr19_14240610_14242055  232   C  T  A  V  chr19  14240840  C  A  18  8  56   15  5  42   Y  73   0.003  0.153  0.835  0
+    chr19_39866997_39874915  3117  C  T  P  P  chr19  39870110  C  P  3   7  65   14  2  32   Y  6    0.321  0.911  0.462  4
+    etc.
+
+- output::
+
+    chr2_75111355_75112576    314  A  C  L  F  chr2   75111676  C  F  15  4  53   2   9  48   Y  96   0.369  0.355  0.396  0
+    chr8_93901796_93905612   2471  A  C  A  A  chr8   93904264  A  A  8   0  51   10  2  14   Y  961  0.016  0.534  0.114  2
+    chr10_7434473_7435447    524   T  C  S  S  chr10  7435005   T  S  11  5  90   14  0  69   Y  626  0.066  0.406  0.727  0
+    chr14_80021455_80022064  138   G  A  H  H  chr14  80021593  G  H  14  0  69   9   6  124  Y  377  0.118  0.997  0.195  1
+    chr15_64470252_64471048  89    G  A  Y  Y  chr15  64470341  G  Y  5   6  109  14  0  69   Y  312  0.247  0.998  0.393  0
+    chr18_48070585_48071386  514   C  T  E  K  chr18  48071100  T  K  7   7  46   14  0  69   Y  2    0.200  0.032  0.163  0
+    chr19_14240610_14242055  232   C  T  A  V  chr19  14240840  C  A  18  8  56   15  5  42   Y  73   0.003  0.153  0.835  0
+    etc.
+
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/specify.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,69 @@
+#!/usr/bin/env python
+
+import gd_util
+import sys
+from Population import Population
+
+################################################################################
+
+def parse_string(str_arg, ind_token2col):
+    columns = []
+
+    string = gd_util.unwrap_string(str_arg)
+    tokens = find_tokens(string, ind_token2col)
+
+    for token in tokens:
+        col = ind_token2col[token]
+        if col not in columns:
+            columns.append(col)
+
+    return columns
+
+def find_tokens(string, tokens):
+    rv = []
+    for token in tokens:
+        if token in string:
+            if token not in rv:
+                rv.append(token)
+    return rv
+
+################################################################################
+
+if len(sys.argv) != 6:
+    gd_util.die('Usage')
+
+input, output, ind_arg, cb_arg, str_arg = sys.argv[1:]
+
+p_total = Population()
+p_total.from_wrapped_dict(ind_arg)
+
+p_cb = Population()
+p_cb.from_wrapped_dict(cb_arg)
+
+if not p_total.is_superset(p_cb):
+    gd_util.die('There is a checked individual that does not appear in the SNP table')
+
+################################################################################
+
+ind_col2name = {}
+ind_token2col = {}
+for col in p_total.column_list():
+    individual = p_total.individual_with_column(col)
+    name = individual.name
+    ind_col2name[col] = name
+    first_token = name.split()[0]
+    if first_token not in ind_token2col:
+        ind_token2col[first_token] = col
+    else:
+        gd_util.die('duplicate first token: {0}'.format(first_token))
+
+out_cols = p_cb.column_list()
+str_cols = parse_string(str_arg, ind_token2col)
+
+with open(output, 'w') as fh:
+    for col in sorted(ind_col2name.keys()):
+        if col in out_cols or col in str_cols:
+            print >> fh, '\t'.join([str(x) for x in [col, ind_col2name[col], '']])
+
+sys.exit(0)
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/specify.xml	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,116 @@
+<tool id="gd_specify" name="Specify Individuals" version="1.1.0">
+  <description>: Define a collection of individuals from a gd_snp dataset</description>
+
+  <command interpreter="python">
+    #import json
+    #import base64
+    #import zlib
+    #set $ind_names = $input.dataset.metadata.individual_names
+    #set $ind_colms = $input.dataset.metadata.individual_columns
+    #set $ind_dict = dict(zip($ind_names, $ind_colms))
+    #set $ind_json = json.dumps($ind_dict, separators=(',',':'))
+    #set $ind_comp = zlib.compress($ind_json, 9)
+    #set $ind_arg = base64.b64encode($ind_comp)
+    #set $cb_string = str($individuals).strip()
+    #if $cb_string != 'None'
+      #set $cb_dict = dict.fromkeys($cb_string.split('\t'))
+      #for $cb_name in $cb_dict:
+        #set $cb_idx = $input.dataset.metadata.individual_names.index($cb_name)
+        #set $cb_dict[$cb_name] = str($input.dataset.metadata.individual_columns[$cb_idx])
+      #end for
+    #else
+      #set $cb_dict = dict()
+    #end if
+    #set $cb_json = json.dumps($cb_dict, separators=(',',':'))
+    #set $cb_comp = zlib.compress($cb_json, 9)
+    #set $cb_arg = base64.b64encode($cb_comp)
+    #set $str_string = str($string).strip()
+    #set $str_comp = zlib.compress($str_string, 9)
+    #set $str_arg = base64.b64encode($str_comp)
+    specify.py '$input' '$output' '$ind_arg' '$cb_arg' '$str_arg'
+  </command>
+
+  <inputs>
+    <param name="input" type="data" format="gd_snp,gd_genotype" label="SNP or Genotype dataset"/>
+    <param name="individuals" type="select" display="checkboxes" multiple="true" separator="&#9;" label="Individuals to include">
+      <options>
+        <filter type="data_meta" ref="input" key="individual_names" />
+      </options>
+    </param>
+    <param name="outname" type="text" size="20" label="Label for this collection">
+      <validator type="empty_field" message="You must enter a label."/>
+      #used to be "Individuals from ${input.hid}"
+    </param>
+    <param name="string" type="text" area="true" size="5x40" label="Individuals to include">
+      <sanitizer>
+        <valid initial="string.printable"/>
+      </sanitizer>
+    </param>
+  </inputs>
+
+  <outputs>
+    <data name="output" format="gd_indivs" label="${outname}" />
+  </outputs>
+
+  <tests>
+    <test>
+      <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" />
+      <param name="individuals" value="PB1,PB2" />
+      <output name="output" file="test_in/a.gd_indivs" />
+    </test>
+  </tests>
+
+  <help>
+
+**Dataset formats**
+
+The input dataset is in gd_snp_ or gd_genotype_ format;
+the output is in gd_indivs_ format.  (`Dataset missing?`_)
+
+.. _gd_snp: ./static/formatHelp.html#gd_snp
+.. _gd_genotype: ./static/formatHelp.html#gd_genotype
+.. _gd_indivs: ./static/formatHelp.html#gd_indivs
+.. _Dataset missing?: ./static/formatHelp.html
+
+-----
+
+**What it does**
+
+This tool makes a list of selected entities, i.e., the sets of four
+columns representing individuals or groups from a gd_snp dataset, or
+sets of single columns in a gd_genotype file.  It does not copy the
+data; it just records which entities should be considered as belonging
+to some collection or population.  The label you specify is used to
+name the output dataset in your history.  This list can then be used
+to instruct other tools to work on just part of the original gd_snp or
+gd_genotype dataset.  The entities can be specified with the checklist
+and/or by pasting their names (possibly with extraneous characters, as
+in a portion of the Newick-format output of the Phylogenetic Tree tool)
+into the box provided at the bottom of the page.
+
+-----
+
+**Example**
+
+- input::
+
+   Contig161_chr1_4641264_4641879   115  C  T  73.5   chr1   4641382  C   6  0  2  45   8  0  2  51   15  0  2  72   5  0  2  42   6  0  2  45  10  0  2  57   Y  54  0.323  0
+   Contig48_chr1_10150253_10151311   11  A  G  94.3   chr1  10150264  A   1  0  2  30   1  0  2  30    1  0  2  30   3  0  2  36   1  0  2  30   1  0  2  30   Y  22  +99.   0
+   Contig20_chr1_21313469_21313570   66  C  T  54.0   chr1  21313534  C   4  0  2  39   4  0  2  39    5  0  2  42   4  0  2  39   4  0  2  39   5  0  2  42   N   1  +99.   0
+   etc.
+
+- input metadata::
+
+   #{"column_names":["scaf","pos","A","B","qual","ref","rpos","rnuc",
+   #"1A","1B","1G","1Q","2A","2B","2G","2Q","3A","3B","3G","3Q","4A","4B","4G","4Q","5A","5B","5G","5Q","6A","6B","6G","6Q",
+   #"pair","dist","prim","rflp"],"dbkey":"canFam2","individuals":[["PB1",9],["PB2",13],["PB3",17],["PB4",21],["PB6",25],["PB8",29]],
+   #"pos":2,"rPos":7,"ref":6,"scaffold":1,"species":"bear"}
+
+- output when individuals PB1, PB2, and PB3 are selected::
+
+   9   PB1
+   13  PB2
+   17  PB3
+
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/specify_restriction_enzymes.py	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,103 @@
+#!/usr/bin/env python
+
+import os
+import sys
+from optparse import OptionParser
+import genome_diversity as gd
+
+def main_function( parse_arguments=None ):
+    if parse_arguments is None:
+        parse_arguments = lambda arguments: ( None, arguments )
+    def main_decorator( to_decorate ):
+        def decorated_main( arguments=None ):
+            if arguments is None:
+                arguments = sys.argv
+            options, arguments = parse_arguments( arguments )
+            rc = 1
+            try:
+                rc = to_decorate( options, arguments )
+            except Exception, err:
+                sys.stderr.write( 'ERROR: %s\n' % str( err ) )
+                traceback.print_exc()
+            finally:
+                sys.exit( rc )
+        return decorated_main
+    return main_decorator
+
+def parse_arguments( arguments ):
+    parser = OptionParser()
+    parser.add_option('--input',
+                        type='string', dest='input',
+                        help='file of selected SNPs')
+    parser.add_option('--output',
+                        type='string', dest='output',
+                        help='output file')
+    parser.add_option('--primers_loc',
+                        type='string', dest='primers_loc',
+                        help='primers .loc file')
+    parser.add_option('--scaffold_col',
+                        type="int", dest='scaffold_col',
+                        help='scaffold column in the input file')
+    parser.add_option('--pos_col',
+                        type="int", dest='pos_col',
+                        help='position column in the input file')
+    parser.add_option('--enzyme_list',
+                        type="string", dest='enzyme_list_string',
+                        help='comma separated list of enzymes')
+    parser.add_option('--species',
+                        type="string", dest='species',
+                        help='species')
+    return parser.parse_args( arguments[1:] )
+
+
+@main_function( parse_arguments )
+def main( options, arguments ):
+    if not options.input:
+        raise RuntimeError( 'missing --input option' )
+    if not options.output:
+        raise RuntimeError( 'missing --output option' )
+    if not options.primers_loc:
+        raise RuntimeError( 'missing --primers_loc option' )
+    if not options.scaffold_col:
+        raise RuntimeError( 'missing --scaffold_col option' )
+    if not options.pos_col:
+        raise RuntimeError( 'missing --pos_col option' )
+    if not options.enzyme_list_string:
+        raise RuntimeError( 'missing --enzyme_list option' )
+    if not options.species:
+        raise RuntimeError( 'missing --species option' )
+
+    snps = gd.SnpFile( filename=options.input, seq_col=int( options.scaffold_col ), pos_col=int( options.pos_col ) )
+
+    out_fh = gd._openfile( options.output, 'w' )
+
+    enzyme_dict = {}
+    for enzyme in options.enzyme_list_string.split( ',' ):
+        enzyme = enzyme.strip()
+        if enzyme:
+            enzyme_dict[enzyme] = 1
+
+    primer_data_file = gd.get_filename_from_loc( options.species, options.primers_loc )
+    file_root, file_ext = os.path.splitext( primer_data_file )
+    primer_index_file = file_root + ".cdb"
+    primers = gd.PrimersFile( data_file=primer_data_file, index_file=primer_index_file )
+
+    comments_printed = False
+
+    while snps.next():
+        seq, pos = snps.get_seq_pos()
+        enzyme_list = primers.get_enzymes( seq, pos )
+        for enzyme in enzyme_list:
+            if enzyme in enzyme_dict:
+                if not comments_printed:
+                    for comment in snps.comments:
+                        out_fh.write( "%s\n" % comment )
+                    comments_printed = True
+                out_fh.write( "%s\n" % snps.line )
+                break
+
+    out_fh.close()
+
+if __name__ == "__main__":
+    main()
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/specify_restriction_enzymes.xml	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,101 @@
+<tool id="gd_specify_restriction_enzymes" name="Differential Cleavage" version="1.0.0">
+  <description>: Select SNPs differentially cut by specified restriction enzymes</description>
+
+  <command interpreter="python">
+    specify_restriction_enzymes.py "--input=$input" "--output=$output" "--primers_loc=${GALAXY_DATA_INDEX_DIR}/gd.primers.loc"
+    #if $override_metadata.choice == "0":
+      "--scaffold_col=${input.metadata.scaffold}" "--pos_col=${input.metadata.pos}" "--species=${input.metadata.species}"
+    #else
+      "--scaffold_col=$scaf_col" "--pos_col=$pos_col" "--species=$species"
+    #end if
+    "--enzyme_list=$enzymes"
+  </command>
+
+  <inputs>
+    <param format="tabular" name="input" type="data" label="SNP dataset"/>
+    <conditional name="override_metadata">
+      <param name="choice" type="select" format="integer" label="Choose columns" help="Datasets in gd_snp format have the columns in the metadata, all others need the columns chosen." >
+        <option value="0" selected="true">No, get columns from metadata</option>
+        <option value="1" >Yes, choose columns</option>
+      </param>
+      <when value="0" />
+      <when value="1">
+        <param name="scaf_col" type="data_column" data_ref="input" numerical="false" label="Column with scaffold"/>
+        <param name="pos_col" type="data_column" data_ref="input" numerical="true" label="Column with position"/>
+        <param name="species" type="select" label="Choose species">
+          <options from_file="gd.species.txt">
+            <column name="name" index="1"/>
+            <column name="value" index="0"/>
+          </options>
+        </param>
+      </when>
+    </conditional>
+
+    <param name="enzymes" type="select" display="checkboxes" multiple="true" label="Choose enzymes">
+        <options from_file="gd.restriction_enzymes.txt">
+            <column name="name" index="0"/>
+            <column name="value" index="1"/>
+        </options>
+    </param>
+  </inputs>
+
+  <outputs>
+    <data format="gd_snp" name="output" metadata_source="input"/>
+  </outputs>
+
+  <tests>
+    <test>
+      <param name="input" value="test_out/select_snps/select_snps.gd_snp" ftype="gd_snp" />
+      <param name="choice" value="0" />
+      <param name="enzymes" value="Bsp1286I,HaeII,RsaI" />
+      <output name="output" file="test_out/specify_restriction_enzymes/specify_restriction_enzymes.gd_snp" />
+    </test>
+  </tests>
+
+  <help>
+
+**Dataset formats**
+
+The input and output datasets are in tabular_ format.
+The input dataset must contain columns for scaffold or chromosome and position.
+(`Dataset missing?`_)
+
+.. _tabular: ./static/formatHelp.html#tab
+.. _Dataset missing?: ./static/formatHelp.html
+
+-----
+
+**What it does**
+
+It selects the SNPs that are differentially cut by at least one of the
+specified restriction enzymes. The enzymes are required to cut the amplified
+segment (for the specified PCR primers) only at the SNP.
+
+-----
+
+**Example**
+
+- input (gd_snp format)::
+
+    chr2_75111355_75112576    314  A  C  L  F  chr2   75111676  C  F  15  4  53   2   9  48   Y  96   0.369  0.355  0.396  0
+    chr8_93901796_93905612   2471  A  C  A  A  chr8   93904264  A  A  8   0  51   10  2  14   Y  961  0.016  0.534  0.114  2
+    chr10_7434473_7435447    524   T  C  S  S  chr10  7435005   T  S  11  5  90   14  0  69   Y  626  0.066  0.406  0.727  0
+    chr14_80021455_80022064  138   G  A  H  H  chr14  80021593  G  H  14  0  69   9   6  124  Y  377  0.118  0.997  0.195  1
+    chr15_64470252_64471048  89    G  A  Y  Y  chr15  64470341  G  Y  5   6  109  14  0  69   Y  312  0.247  0.998  0.393  0
+    chr18_48070585_48071386  514   C  T  E  K  chr18  48071100  T  K  7   7  46   14  0  69   Y  2    0.200  0.032  0.163  0
+    chr18_50154905_50155664  304   A  G  Y  C  chr18  50155208  A  Y  4   2  17   5   1  22   Y  8    0.022  0.996  0.128  0
+    chr18_57379354_57380496  315   C  T  V  V  chr18  57379669  G  V  11  0  60   9   6  62   Y  726  0.118  0.048  0.014  1
+    chr19_14240610_14242055  232   C  T  A  V  chr19  14240840  C  A  18  8  56   15  5  42   Y  73   0.003  0.153  0.835  0
+    chr19_39866997_39874915  3117  C  T  P  P  chr19  39870110  C  P  3   7  65   14  2  32   Y  6    0.321  0.911  0.462  4
+    etc.
+
+- output::
+
+    chr8_93901796_93905612   2471  A  C  A  A  chr8   93904264  A  A  8   0  51   10  2  14   Y  961  0.016  0.534  0.114  2
+    chr14_80021455_80022064  138   G  A  H  H  chr14  80021593  G  H  14  0  69   9   6  124  Y  377  0.118  0.997  0.195  1
+    chr18_57379354_57380496  315   C  T  V  V  chr18  57379669  G  V  11  0  60   9   6  62   Y  726  0.118  0.048  0.014  1
+    chr19_39866997_39874915  3117  C  T  P  P  chr19  39870110  C  P  3   7  65   14  2  32   Y  6    0.321  0.911  0.462  4
+    etc.
+
+  </help>
+</tool>
Binary file genome_diversity/static/images/cluster_kegg_formula.png has changed
Binary file genome_diversity/static/images/gd_coverage.png has changed
Binary file genome_diversity/static/images/gd_pathway_image.png has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/test-data/test_in/a.gd_indivs	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,2 @@
+9	PB1
+13	PB2
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/test-data/test_in/b.gd_indivs	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,2 @@
+17	PB3
+21	PB4
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/test-data/test_in/c.gd_indivs	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,2 @@
+25	PB6
+29	PB8
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/test-data/test_in/ensembl.tabular	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,150 @@
+ENSCAFT00000000001
+ENSCAFT00000000144
+ENSCAFT00000000160
+ENSCAFT00000000215
+ENSCAFT00000000233
+ENSCAFT00000000365
+ENSCAFT00000000507
+ENSCAFT00000000517
+ENSCAFT00000000674
+ENSCAFT00000000724
+ENSCAFT00000000760
+ENSCAFT00000000762
+ENSCAFT00000001047
+ENSCAFT00000001052
+ENSCAFT00000001063
+ENSCAFT00000001076
+ENSCAFT00000001104
+ENSCAFT00000001141
+ENSCAFT00000001146
+ENSCAFT00000001204
+ENSCAFT00000001219
+ENSCAFT00000001250
+ENSCAFT00000001352
+ENSCAFT00000001363
+ENSCAFT00000001421
+ENSCAFT00000001523
+ENSCAFT00000001575
+ENSCAFT00000001587
+ENSCAFT00000001597
+ENSCAFT00000002056
+ENSCAFT00000002100
+ENSCAFT00000002110
+ENSCAFT00000002175
+ENSCAFT00000002259
+ENSCAFT00000002460
+ENSCAFT00000002537
+ENSCAFT00000002577
+ENSCAFT00000002578
+ENSCAFT00000002660
+ENSCAFT00000002792
+ENSCAFT00000002849
+ENSCAFT00000002999
+ENSCAFT00000003163
+ENSCAFT00000003223
+ENSCAFT00000003307
+ENSCAFT00000003515
+ENSCAFT00000003560
+ENSCAFT00000003644
+ENSCAFT00000003824
+ENSCAFT00000003840
+ENSCAFT00000004092
+ENSCAFT00000004103
+ENSCAFT00000004208
+ENSCAFT00000004253
+ENSCAFT00000004311
+ENSCAFT00000004464
+ENSCAFT00000004511
+ENSCAFT00000004609
+ENSCAFT00000004673
+ENSCAFT00000004726
+ENSCAFT00000004799
+ENSCAFT00000004933
+ENSCAFT00000004993
+ENSCAFT00000005126
+ENSCAFT00000005142
+ENSCAFT00000005225
+ENSCAFT00000005323
+ENSCAFT00000005467
+ENSCAFT00000005496
+ENSCAFT00000005518
+ENSCAFT00000005653
+ENSCAFT00000005746
+ENSCAFT00000005749
+ENSCAFT00000005832
+ENSCAFT00000005972
+ENSCAFT00000006025
+ENSCAFT00000006114
+ENSCAFT00000006157
+ENSCAFT00000006219
+ENSCAFT00000006272
+ENSCAFT00000006453
+ENSCAFT00000006479
+ENSCAFT00000006507
+ENSCAFT00000006669
+ENSCAFT00000006689
+ENSCAFT00000006827
+ENSCAFT00000006891
+ENSCAFT00000007130
+ENSCAFT00000007145
+ENSCAFT00000007244
+ENSCAFT00000007375
+ENSCAFT00000007440
+ENSCAFT00000007467
+ENSCAFT00000007484
+ENSCAFT00000007527
+ENSCAFT00000007553
+ENSCAFT00000007697
+ENSCAFT00000007703
+ENSCAFT00000007747
+ENSCAFT00000007774
+ENSCAFT00000007776
+ENSCAFT00000007779
+ENSCAFT00000007859
+ENSCAFT00000007951
+ENSCAFT00000007959
+ENSCAFT00000008012
+ENSCAFT00000008063
+ENSCAFT00000008142
+ENSCAFT00000008198
+ENSCAFT00000008413
+ENSCAFT00000008540
+ENSCAFT00000008586
+ENSCAFT00000008588
+ENSCAFT00000008673
+ENSCAFT00000008678
+ENSCAFT00000008728
+ENSCAFT00000008769
+ENSCAFT00000008831
+ENSCAFT00000009074
+ENSCAFT00000009114
+ENSCAFT00000009614
+ENSCAFT00000009698
+ENSCAFT00000009710
+ENSCAFT00000010094
+ENSCAFT00000010141
+ENSCAFT00000010439
+ENSCAFT00000010496
+ENSCAFT00000010516
+ENSCAFT00000010531
+ENSCAFT00000010559
+ENSCAFT00000010593
+ENSCAFT00000010616
+ENSCAFT00000010630
+ENSCAFT00000010829
+ENSCAFT00000010865
+ENSCAFT00000010931
+ENSCAFT00000010977
+ENSCAFT00000010988
+ENSCAFT00000011187
+ENSCAFT00000011380
+ENSCAFT00000011397
+ENSCAFT00000011721
+ENSCAFT00000011730
+ENSCAFT00000011771
+ENSCAFT00000011789
+ENSCAFT00000011968
+ENSCAFT00000012081
+ENSCAFT00000012133
+ENSCAFT00000012159
+ENSCAFT00000012254
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/test-data/test_in/sample.gd_sap	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,401 @@
+#{"column_names":["contig","pos","ref","rPos","trns","pep","AA1","loc","AA2","KEGG","pred","path"],"pos":2,"rPos":4,"ref":3,"dbkey":"canFam2","scaffold":1,"species":"bear","kegg_gene":10,"kegg_path":12}
+Contig39_chr1_3261104_3261850	414	chr1	3261546	ENSCAFT00000000001	ENSCAFP00000000001	S	667	F	476153	probably damaging	cfa00230=Purine metabolism.cfa00500=Starch and sucrose metabolism.cfa00740=Riboflavin metabolism.cfa00760=Nicotinate and nicotinamide metabolism.cfa00770=Pantothenate and CoA biosynthesis.cfa01100=Metabolic pathways
+Contig62_chr1_19011969_19012646	265	chr1	19012240	ENSCAFT00000000144	ENSCAFP00000000125	*	161	R	483960	probably damaging	N
+Contig36_chr1_20102654_20103213	365	chr1	20103029	ENSCAFT00000000160	ENSCAFP00000000140	R	407	Q	610160	possibly damaging	N
+Contig136_chr10_3710404_3714591	3079	chr10	3713499	ENSCAFT00000000215	ENSCAFP00000000194	T	103	P	U	benign	N
+Contig36_chr1_23682012_23682647	374	chr1	23682388	ENSCAFT00000000233	ENSCAFP00000000210	N	234	S	483973	benign	N
+Contig163_chr10_4573526_4574494	487	chr10	4574010	ENSCAFT00000000365	ENSCAFP00000000332	R	186	K	474414	benign	cfa00450=Selenocompound metabolism.cfa00970=Aminoacyl-tRNA biosynthesis
+Contig55_chr1_40056604_40059808	2081	chr1	40058686	ENSCAFT00000000507	ENSCAFP00000000458	I	247	K	484023	possibly damaging	N
+Contig17_chr1_40203628_40205630	1417	chr1	40205044	ENSCAFT00000000517	ENSCAFP00000000468	N	109	S	476233	benign	N
+Contig97_chr1_44847984_44848380	285	chr1	44848272	ENSCAFT00000000674	ENSCAFP00000000618	Q	27	R	611986	benign	N
+Contig214_chr10_16106753_16106969	121	chr10	16106873	ENSCAFT00000000724	ENSCAFP00000000668	A	301	T	609478	benign	N
+Contig75_chr1_45731970_45732932	436	chr1	45732397	ENSCAFT00000000760	ENSCAFP00000000701	I	490	V	U	benign	N
+Contig33_chr1_45614845_45617413	1835	chr1	45616685	ENSCAFT00000000760	ENSCAFP00000000701	A	4390	V	U	benign	N
+Contig95_chr10_18829724_18831056	914	chr10	18830645	ENSCAFT00000000762	ENSCAFP00000000703	A	512	V	U	possibly damaging	N
+Contig197_chr13_8622062_8623071	606	chr13	8622665	ENSCAFT00000001047	ENSCAFP00000000959	T	406	I	475067	possibly damaging	cfa00240=Pyrimidine metabolism.cfa00410=beta-Alanine metabolism.cfa00770=Pantothenate and CoA biosynthesis.cfa00983=Drug metabolism - other enzymes.cfa01100=Metabolic pathways
+Contig243_chr10_19959210_19960069	701	chr10	19959858	ENSCAFT00000001052	ENSCAFP00000000964	E	1345	K	U	benign	N
+Contig137_chr13_10622950_10624043	1039	chr13	10623979	ENSCAFT00000001063	ENSCAFP00000000975	E	10	K	481999	benign	N
+Contig137_chr13_10622950_10624043	1006	chr13	10623946	ENSCAFT00000001063	ENSCAFP00000000975	R	21	C	481999	probably damaging	N
+Contig115_chr12_4411478_4412322	124	chr12	4411614	ENSCAFT00000001076	ENSCAFP00000000986	R	177	H	U	benign	N
+Contig150_chr12_4438230_4439944	385	chr12	4438614	ENSCAFT00000001104	ENSCAFP00000001014	Y	277	D	607591	benign	N
+Contig84_chr1_52076858_52077103	80	chr1	52076943	ENSCAFT00000001141	ENSCAFP00000001046	C	147	Y	484064	benign	N
+Contig29_chr13_13215547_13217183	793	chr13	13216352	ENSCAFT00000001146	ENSCAFP00000001050	P	1	R	475076	probably damaging	N
+Contig251_chr10_22876556_22877097	152	chr10	22876714	ENSCAFT00000001204	ENSCAFP00000001103	E	1162	D	481203	benign	N
+Contig21_chr10_22964856_22965302	202	chr10	22965058	ENSCAFT00000001219	ENSCAFP00000001115	P	6	Q	474465	benign	N
+Contig199_chr12_5083018_5084534	453	chr12	5083472	ENSCAFT00000001250	ENSCAFP00000001144	I	185	T	481729.481731	benign	N.cfa04145=Phagosome.cfa04514=Cell adhesion molecules (CAMs).cfa04612=Antigen processing and presentation.cfa04672=Intestinal immune network for IgA production.cfa04940=Type I diabetes mellitus.cfa05140=Leishmaniasis.cfa05145=Toxoplasmosis.cfa05150=Staphylococcus aureus infection.cfa05152=Tuberculosis.cfa05164=Influenza A.cfa05166=HTLV-I infection.cfa05168=Herpes simplex infection.cfa05310=Asthma.cfa05320=Autoimmune thyroid disease.cfa05322=Systemic lupus erythematosus.cfa05323=Rheumatoid arthritis.cfa05330=Allograft rejection.cfa05332=Graft-versus-host disease.cfa05416=Viral myocarditis
+Contig41_chr13_21629998_21630487	161	chr13	21630157	ENSCAFT00000001352	ENSCAFP00000001239	P	729	S	482026	possibly damaging	cfa00565=Ether lipid metabolism
+Contig16_chr13_21786766_21788016	169	chr13	21786927	ENSCAFT00000001363	ENSCAFP00000001249	V	1142	A	475084	benign	cfa03022=Basal transcription factors
+Contig60_chr1_60333035_60333884	731	chr1	60333755	ENSCAFT00000001421	ENSCAFP00000001307	V	400	I	484096	benign	N
+Contig44_chr13_24555640_24556298	499	chr13	24556139	ENSCAFT00000001523	ENSCAFP00000001400	N	660	S	475088	benign	N
+Contig153_chr12_5955114_5958935	2950	chr12	5958094	ENSCAFT00000001575	ENSCAFP00000001449	E	13	D	481744	benign	cfa04141=Protein processing in endoplasmic reticulum
+Contig146_chr13_25076435_25077249	723	chr13	25077165	ENSCAFT00000001587	ENSCAFP00000001461	T	9	S	482035	benign	N
+Contig81_chr13_25579918_25582207	874	chr13	25580772	ENSCAFT00000001597	ENSCAFP00000001469	E	62	G	609411	benign	N
+Contig159_chr10_28604683_28606028	753	chr10	28605433	ENSCAFT00000002056	ENSCAFP00000001903	S	79	P	610014	benign	N
+Contig30_chr11_29945215_29949829	3973	chr11	29949181	ENSCAFT00000002100	ENSCAFP00000001944	M	282	T	U	benign	N
+Contig102_chr10_29039231_29041280	829	chr10	29040065	ENSCAFT00000002110	ENSCAFP00000001953	R	311	Q	481249	unknown	N
+Contig187_chr1_78583588_78584279	250	chr1	78583839	ENSCAFT00000002175	ENSCAFP00000002014	K	176	R	476310	benign	N
+Contig199_chr1_79234891_79237527	384	chr1	79235278	ENSCAFT00000002259	ENSCAFP00000002095	V	403	A	484151	benign	N
+Contig119_chr12_12212738_12214663	1005	chr12	12213720	ENSCAFT00000002460	ENSCAFP00000002280	R	749	Q	481785	possibly damaging	N
+Contig119_chr12_12212738_12214663	918	chr12	12213633	ENSCAFT00000002460	ENSCAFP00000002280	R	778	Q	481785	benign	N
+Contig39_chr14_10730123_10732539	335	chr14	10730462	ENSCAFT00000002537	ENSCAFP00000002356	V	1179	E	U	benign	N
+Contig41_chr1_84886710_84894794	3494	chr1	84890207	ENSCAFT00000002577	ENSCAFP00000002394	E	1089	K	484157	possibly damaging	N
+Contig182_chr12_13881114_13883427	1690	chr12	13882828	ENSCAFT00000002578	ENSCAFP00000002395	S	99	G	608906	benign	N
+Contig34_chr11_48151988_48152712	198	chr11	48152205	ENSCAFT00000002660	ENSCAFP00000002468	C	587	R	U	possibly damaging	N
+Contig37_chr10_34118256_34119269	437	chr10	34118687	ENSCAFT00000002792	ENSCAFP00000002588	A	377	T	474523	benign	N
+Contig21_chr14_16091274_16093278	716	chr14	16091997	ENSCAFT00000002849	ENSCAFP00000002642	R	126	C	475216	probably damaging	N
+Contig57_chr1_90983602_90984717	559	chr1	90984158	ENSCAFT00000002999	ENSCAFP00000002781	A	226	V	U	benign	N
+Contig45_chr12_15798569_15798849	141	chr12	15798709	ENSCAFT00000003163	ENSCAFP00000002938	N	342	S	474921	benign	cfa03040=Spliceosome
+Contig83_chr12_17852905_17859596	2392	chr12	17855305	ENSCAFT00000003223	ENSCAFP00000002995	E	770	Q	474925	benign	N
+Contig41_chr12_18725392_18725889	169	chr12	18725560	ENSCAFT00000003307	ENSCAFP00000003070	R	80	Q	609995	benign	N
+Contig9_chr14_26125779_26127414	486	chr14	26126264	ENSCAFT00000003515	ENSCAFP00000003259	P	123	T	482316	benign	N
+Contig132_chr1_101565951_101566612	255	chr1	101566210	ENSCAFT00000003560	ENSCAFP00000003298	L	588	F	U	unknown	N
+Contig142_chr1_102093954_102094392	121	chr1	102094072	ENSCAFT00000003644	ENSCAFP00000003373	K	120	E	484216	benign	cfa00290=Valine, leucine and isoleucine biosynthesis.cfa00970=Aminoacyl-tRNA biosynthesis
+Contig129_chr14_34071666_34074617	2313	chr14	34073957	ENSCAFT00000003824	ENSCAFP00000003537	T	282	I	475249	probably damaging	N
+Contig147_chr14_34262125_34262938	340	chr14	34262468	ENSCAFT00000003840	ENSCAFP00000003553	I	70	V	482333	benign	N
+Contig52_chr12_36031985_36035244	1237	chr12	36033208	ENSCAFT00000004092	ENSCAFP00000003784	Y	564	H	474960	benign	N
+Contig176_chr1_105494865_105495258	119	chr1	105494995	ENSCAFT00000004103	ENSCAFP00000003793	A	406	V	484298	benign	N
+Contig60_chr11_63130652_63131816	702	chr11	63131349	ENSCAFT00000004208	ENSCAFP00000003892	V	260	I	481637	benign	N
+Contig9_chr10_53579958_53582510	688	chr10	53580646	ENSCAFT00000004253	ENSCAFP00000003937	S	191	G	100534006.100534007.474588	benign	N
+Contig93_chr14_38451661_38452163	221	chr14	38451882	ENSCAFT00000004311	ENSCAFP00000003990	A	420	V	482346	benign	N
+Contig70_chr12_42859511_42860010	180	chr12	42859693	ENSCAFT00000004464	ENSCAFP00000004126	P	7	S	481892	possibly damaging	N
+Contig28_chr12_43447144_43449156	1136	chr12	43448279	ENSCAFT00000004511	ENSCAFP00000004169	V	582	M	481893	benign	N
+Contig18_chr13_62535238_62535697	227	chr13	62535471	ENSCAFT00000004609	ENSCAFP00000004263	E	277	D	611755	benign	N
+Contig282_chr1_108960925_108962235	205	chr1	108961141	ENSCAFT00000004673	ENSCAFP00000004325	A	149	V	611817	benign	N
+Contig110_chr1_109196028_109197290	987	chr1	109197021	ENSCAFT00000004726	ENSCAFP00000004374	E	330	D	610047	benign	cfa00230=Purine metabolism.cfa00240=Pyrimidine metabolism.cfa01100=Metabolic pathways.cfa03030=DNA replication.cfa03410=Base excision repair.cfa03420=Nucleotide excision repair.cfa03430=Mismatch repair.cfa03440=Homologous recombination.cfa05166=HTLV-I infection
+Contig89_chr11_69097905_69099099	568	chr11	69098443	ENSCAFT00000004799	ENSCAFP00000004445	E	1317	G	U	benign	N
+Contig118_chr14_46155051_46155557	173	chr14	46155218	ENSCAFT00000004933	ENSCAFP00000004572	S	110	L	482382	benign	cfa04621=NOD-like receptor signaling pathway.cfa05133=Pertussis
+Contig54_chr12_51910786_51912716	682	chr12	51911460	ENSCAFT00000004993	ENSCAFP00000004630	H	2889	Y	474995	benign	cfa03008=Ribosome biogenesis in eukaryotes
+Contig95_chr10_67698730_67699605	267	chr10	67698997	ENSCAFT00000005126	ENSCAFP00000004751	P	45	L	U	benign	N
+Contig265_chr17_3177908_3178389	332	chr17	3178241	ENSCAFT00000005142	ENSCAFP00000004763	A	306	P	606804	benign	N
+Contig322_chr17_4977962_4979371	1122	chr17	4979079	ENSCAFT00000005225	ENSCAFP00000004836	T	319	I	475647	possibly damaging	N
+Contig48_chr11_71453437_71456331	1725	chr11	71455160	ENSCAFT00000005323	ENSCAFP00000004927	A	226	V	U	benign	N
+Contig51_chr16_4789440_4790118	484	chr16	4789915	ENSCAFT00000005467	ENSCAFP00000005065	Q	318	H	U	benign	N
+Contig32_chr12_57224809_57225619	146	chr12	57224960	ENSCAFT00000005496	ENSCAFP00000005093	A	273	T	481925	benign	N
+Contig6_chr14_59310933_59312532	615	chr14	59311551	ENSCAFT00000005518	ENSCAFP00000005112	Y	304	H	492302	probably damaging	cfa02010=ABC transporters.cfa04971=Gastric acid secretion.cfa04972=Pancreatic secretion.cfa04976=Bile secretion
+Contig89_chr11_74391566_74395656	2856	chr11	74394408	ENSCAFT00000005653	ENSCAFP00000031395	R	450	H	403417	benign	cfa04145=Phagosome.cfa04620=Toll-like receptor signaling pathway.cfa05132=Salmonella infection.cfa05133=Pertussis.cfa05134=Legionellosis.cfa05140=Leishmaniasis.cfa05142=Chagas disease (American trypanosomiasis).cfa05144=Malaria.cfa05145=Toxoplasmosis.cfa05146=Amoebiasis.cfa05152=Tuberculosis.cfa05162=Measles.cfa05164=Influenza A.cfa05323=Rheumatoid arthritis
+Contig15_chr1_109713951_109714808	645	chr1	109714594	ENSCAFT00000005746	ENSCAFP00000005319	R	783	K	476410	benign	cfa00071=Fatty acid metabolism.cfa03320=PPAR signaling pathway.cfa04920=Adipocytokine signaling pathway
+Contig47_chr17_11258085_11259619	360	chr17	11258455	ENSCAFT00000005749	ENSCAFP00000005322	V	778	L	610007	benign	N
+Contig1_chr19_4352123_4352541	311	chr19	4352427	ENSCAFT00000005832	ENSCAFP00000005401	H	7	Y	403584	benign	cfa04060=Cytokine-cytokine receptor interaction.cfa04630=Jak-STAT signaling pathway.cfa04672=Intestinal immune network for IgA production.cfa05166=HTLV-I infection.cfa05168=Herpes simplex infection.cfa05323=Rheumatoid arthritis
+Contig57_chr12_66915864_66916357	337	chr12	66916199	ENSCAFT00000005972	ENSCAFP00000005534	F	1242	L	475012	benign	N
+Contig36_chr17_16182220_16182772	282	chr17	16182494	ENSCAFT00000006025	ENSCAFP00000005583	V	13	I	482980	possibly damaging	N
+Contig64_chr19_15052202_15053292	240	chr19	15052443	ENSCAFT00000006114	ENSCAFP00000005658	I	175	V	483829	benign	N
+Contig169_chr12_69415779_69417261	1136	chr12	69416908	ENSCAFT00000006157	ENSCAFP00000005701	D	85	N	475021	possibly damaging	N
+Contig200_chr18_15803806_15804082	169	chr18	15803976	ENSCAFT00000006219	ENSCAFP00000005760	A	66	V	483261	benign	cfa04972=Pancreatic secretion.cfa04978=Mineral absorption
+Contig6_chr18_15814044_15814404	97	chr18	15814150	ENSCAFT00000006219	ENSCAFP00000005760	A	413	S	483261	benign	cfa04972=Pancreatic secretion.cfa04978=Mineral absorption
+Contig104_chr1_110433641_110434230	183	chr1	110433810	ENSCAFT00000006272	ENSCAFP00000005811	A	315	T	484394	benign	cfa00280=Valine, leucine and isoleucine degradation.cfa00290=Valine, leucine and isoleucine biosynthesis.cfa00770=Pantothenate and CoA biosynthesis.cfa01100=Metabolic pathways
+Contig52_chr18_17851226_17851871	284	chr18	17851509	ENSCAFT00000006453	ENSCAFP00000005976	T	311	M	475893	probably damaging	N
+Contig63_chr16_12167721_12168304	388	chr16	12168099	ENSCAFT00000006479	ENSCAFP00000006000	M	634	V	U	benign	N
+Contig101_chr20_4702659_4703738	441	chr20	4703092	ENSCAFT00000006507	ENSCAFP00000006027	G	635	D	484622	probably damaging	cfa03030=DNA replication.cfa04110=Cell cycle
+Contig53_chr19_21456428_21457881	408	chr19	21456840	ENSCAFT00000006669	ENSCAFP00000006174	R	247	L	476094	possibly damaging	N
+Contig58_chr18_19883250_19884312	250	chr18	19883498	ENSCAFT00000006689	ENSCAFP00000006194	*	503	Y	475897	benign	N
+Contig122_chr15_17034758_17035049	142	chr15	17034893	ENSCAFT00000006827	ENSCAFP00000006320	R	117	P	U	benign	N
+Contig131_chr18_20356930_20357227	113	chr18	20357041	ENSCAFT00000006891	ENSCAFP00000006378	V	55	L	610021	benign	N
+Contig117_chr22_5859195_5860740	654	chr22	5859850	ENSCAFT00000007130	ENSCAFP00000006603	S	139	N	485445	benign	cfa04020=Calcium signaling pathway.cfa04080=Neuroactive ligand-receptor interaction
+Contig91_chr17_23506302_23507213	322	chr17	23506624	ENSCAFT00000007145	ENSCAFP00000006614	V	1644	I	607961	benign	N
+Contig3_chr21_16586556_16586852	105	chr21	16586661	ENSCAFT00000007244	ENSCAFP00000006709	C	33	Y	476781	possibly damaging	N
+Contig62_chr2_22645987_22646907	357	chr2	22646352	ENSCAFT00000007375	ENSCAFP00000006833	V	657	F	403767	probably damaging	cfa04977=Vitamin digestion and absorption
+Contig52_chr15_18032498_18034281	880	chr15	18033373	ENSCAFT00000007440	ENSCAFP00000006895	P	227	A	482516	benign	N
+Contig131_chr23_6679385_6679850	198	chr23	6679592	ENSCAFT00000007467	ENSCAFP00000006915	R	136	G	485576	possibly damaging	N
+Contig157_chr22_10584088_10586765	232	chr22	10584326	ENSCAFT00000007484	ENSCAFP00000006926	M	610	T	609336	benign	N
+Contig164_chr2_24336024_24340161	2420	chr2	24338436	ENSCAFT00000007527	ENSCAFP00000006969	S	824	C	607108	probably damaging	N
+Contig109_chr2_24557417_24558710	808	chr2	24558229	ENSCAFT00000007553	ENSCAFP00000006994	L	606	V	487123	benign	cfa03450=Non-homologous end-joining.cfa05340=Primary immunodeficiency
+Contig194_chr15_18573761_18574204	142	chr15	18573904	ENSCAFT00000007697	ENSCAFP00000007130	V	381	I	475382	benign	N
+Contig133_chr23_9924894_9925887	125	chr23	9925016	ENSCAFT00000007703	ENSCAFP00000007136	P	355	S	477019	benign	cfa03430=Mismatch repair.cfa03460=Fanconi anemia pathway.cfa05200=Pathways in cancer.cfa05210=Colorectal cancer.cfa05213=Endometrial cancer
+Contig31_chr23_10199273_10203629	4073	chr23	10203350	ENSCAFT00000007747	ENSCAFP00000007179	A	1844	V	U	benign	N
+Contig21_chr23_10308212_10309269	513	chr23	10308732	ENSCAFT00000007774	ENSCAFP00000007206	K	72	R	477021	benign	cfa04510=Focal adhesion.cfa04512=ECM-receptor interaction.cfa04514=Cell adhesion molecules (CAMs).cfa04810=Regulation of actin cytoskeleton.cfa05410=Hypertrophic cardiomyopathy (HCM).cfa05412=Arrhythmogenic right ventricular cardiomyopathy (ARVC).cfa05414=Dilated cardiomyopathy
+Contig211_chr1_114924893_114925515	171	chr1	114925067	ENSCAFT00000007776	ENSCAFP00000007208	P	1988	A	U	benign	N
+Contig35_chr2_27160577_27161526	804	chr2	27161367	ENSCAFT00000007779	ENSCAFP00000007211	G	473	R	478007.478008	probably damaging	cfa03060=Protein export.cfa04141=Protein processing in endoplasmic reticulum.cfa04145=Phagosome
+Contig79_chr17_24285444_24286769	1263	chr17	24286694	ENSCAFT00000007859	ENSCAFP00000007285	S	209	T	483010	benign	N
+Contig74_chr23_10871047_10871362	70	chr23	10871116	ENSCAFT00000007951	ENSCAFP00000007365	I	474	V	U	benign	N
+Contig34_chr16_18928689_18932806	3409	chr16	18932072	ENSCAFT00000007959	ENSCAFP00000007370	A	3754	S	482810.611087	benign	cfa00310=Lysine degradation
+Contig52_chr21_24452521_24454405	725	chr21	24453245	ENSCAFT00000008012	ENSCAFP00000007418	M	289	T	485173	possibly damaging	N
+Contig261_chr1_115563599_115564561	560	chr1	115564156	ENSCAFT00000008063	ENSCAFP00000007465	A	63	T	484489	possibly damaging	N
+Contig62_chr19_41037398_41039465	159	chr19	41037564	ENSCAFT00000008142	ENSCAFP00000007541	C	744	Y	476128	possibly damaging	N
+Contig84_chr1_115960693_115962811	1467	chr1	115962120	ENSCAFT00000008198	ENSCAFP00000007593	W	61	R	612489	benign	N
+Contig135_chr23_14160194_14160717	270	chr23	14160468	ENSCAFT00000008413	ENSCAFP00000007796	V	298	I	U	benign	N
+Contig41_chr17_26203621_26205196	1407	chr17	26205028	ENSCAFT00000008540	ENSCAFP00000007913	H	172	R	483021	benign	N
+Contig260_chr1_116076701_116078120	746	chr1	116077446	ENSCAFT00000008586	ENSCAFP00000007956	T	2486	I	484499	benign	N
+Contig19_chr23_14811332_14815323	1987	chr23	14813327	ENSCAFT00000008588	ENSCAFP00000007958	S	690	L	U	unknown	N
+Contig180_chr2_35061773_35062172	166	chr2	35061941	ENSCAFT00000008673	ENSCAFP00000008039	T	920	M	478018	probably damaging	N
+Contig106_chr21_26153874_26154496	107	chr21	26153984	ENSCAFT00000008678	ENSCAFP00000008044	A	458	T	485188	benign	N
+Contig3_chr19_45625337_45630123	2563	chr19	45627887	ENSCAFT00000008728	ENSCAFP00000008094	V	1264	I	U	benign	N
+Contig51_chr22_48760401_48761638	636	chr22	48761047	ENSCAFT00000008769	ENSCAFP00000008132	R	1071	K	485523	benign	cfa02010=ABC transporters.cfa04976=Bile secretion
+Contig10_chr15_21173640_21174011	212	chr15	21173839	ENSCAFT00000008831	ENSCAFP00000008192	V	191	I	475398	benign	N
+Contig6_chr24_14680423_14681438	782	chr24	14681208	ENSCAFT00000009074	ENSCAFP00000008417	H	562	R	485769	possibly damaging	cfa04330=Notch signaling pathway
+Contig60_chr9_4528464_4529207	262	chr9	4528727	ENSCAFT00000009114	ENSCAFP00000008453	C	24	F	483354	possibly damaging	N
+Contig54_chr15_29510545_29512205	400	chr15	29510955	ENSCAFT00000009614	ENSCAFP00000008928	H	190	R	475416	benign	N
+Contig46_chr25_5067588_5068089	39	chr25	5067627	ENSCAFT00000009698	ENSCAFP00000009003	S	17	N	486001	benign	N
+Contig126_chr25_5114359_5115799	643	chr25	5114996	ENSCAFT00000009710	ENSCAFP00000009013	R	1952	C	486002	possibly damaging	N
+Contig41_chr26_3455305_3455893	329	chr26	3455620	ENSCAFT00000010094	ENSCAFP00000009363	S	909	A	486223	benign	cfa00230=Purine metabolism.cfa00240=Pyrimidine metabolism.cfa01100=Metabolic pathways.cfa03030=DNA replication.cfa03410=Base excision repair.cfa03420=Nucleotide excision repair.cfa05166=HTLV-I infection
+Contig55_chr26_3463883_3465235	1074	chr26	3464998	ENSCAFT00000010094	ENSCAFP00000009363	R	1273	S	486223	benign	cfa00230=Purine metabolism.cfa00240=Pyrimidine metabolism.cfa01100=Metabolic pathways.cfa03030=DNA replication.cfa03410=Base excision repair.cfa03420=Nucleotide excision repair.cfa05166=HTLV-I infection
+Contig63_chr26_3467460_3468420	195	chr26	3467661	ENSCAFT00000010094	ENSCAFP00000009363	E	1542	Q	486223	benign	cfa00230=Purine metabolism.cfa00240=Pyrimidine metabolism.cfa01100=Metabolic pathways.cfa03030=DNA replication.cfa03410=Base excision repair.cfa03420=Nucleotide excision repair.cfa05166=HTLV-I infection
+Contig13_chr16_32259141_32259752	344	chr16	32259472	ENSCAFT00000010141	ENSCAFP00000009407	I	326	T	482857	benign	cfa04360=Axon guidance
+Contig59_chr21_32994329_32995926	1195	chr21	32995538	ENSCAFT00000010439	ENSCAFP00000009680	H	230	R	610992	benign	N
+Contig39_chr20_24938452_24941620	1292	chr20	24939734	ENSCAFT00000010496	ENSCAFP00000009730	S	28	P	415126	benign	cfa04380=Osteoclast differentiation.cfa04916=Melanogenesis.cfa05200=Pathways in cancer.cfa05218=Melanoma
+Contig2_chr18_28546360_28546760	277	chr18	28546640	ENSCAFT00000010516	ENSCAFP00000009748	P	471	S	U	benign	N
+Contig23_chr20_25560598_25562858	928	chr20	25561520	ENSCAFT00000010531	ENSCAFP00000009762	T	749	I	484693	benign	N
+Contig209_chr18_28672330_28672791	376	chr18	28672689	ENSCAFT00000010559	ENSCAFP00000009790	A	33	D	483405	unknown	N
+Contig261_chr18_28694652_28696968	1808	chr18	28696427	ENSCAFT00000010559	ENSCAFP00000009790	P	1443	L	483405	possibly damaging	N
+Contig30_chr25_12008255_12009009	151	chr25	12008417	ENSCAFT00000010593	ENSCAFP00000009822	Q	151	H	U	benign	N
+Contig46_chr29_3065854_3067420	1265	chr29	3067078	ENSCAFT00000010616	ENSCAFP00000009842	V	3253	A	474176	benign	cfa03450=Non-homologous end-joining.cfa04110=Cell cycle
+Contig59_chr28_3755477_3757019	935	chr28	3756419	ENSCAFT00000010630	ENSCAFP00000009853	R	923	Q	486770	possibly damaging	N
+Contig90_chr29_6393993_6395503	951	chr29	6394948	ENSCAFT00000010829	ENSCAFP00000010033	Y	257	C	486944	benign	N
+Contig42_chr16_39015800_39016389	319	chr16	39016119	ENSCAFT00000010865	ENSCAFP00000010068	D	71	N	U	possibly damaging	N
+Contig95_chr21_34533214_34535079	1133	chr21	34534321	ENSCAFT00000010931	ENSCAFP00000010131	E	118	G	485368	benign	N
+Contig82_chr21_34524815_34525170	247	chr21	34525072	ENSCAFT00000010931	ENSCAFP00000010131	Q	499	R	485368	benign	N
+Contig32_chr24_22727492_22727986	147	chr24	22727648	ENSCAFT00000010977	ENSCAFP00000010173	P	278	L	U	possibly damaging	N
+Contig45_chr16_42405571_42406148	269	chr16	42405837	ENSCAFT00000010988	ENSCAFP00000010184	H	406	R	482891	benign	cfa04145=Phagosome
+Contig66_chr15_43321121_43321872	642	chr15	43321764	ENSCAFT00000011187	ENSCAFP00000010364	F	543	L	475441	benign	N
+Contig184_chr27_5103641_5104991	275	chr27	5103979	ENSCAFT00000011380	ENSCAFP00000010541	V	864	A	U	benign	N
+Contig88_chr17_39320200_39320765	204	chr17	39320404	ENSCAFT00000011397	ENSCAFP00000010558	S	1911	N	475750	benign	cfa04110=Cell cycle.cfa04114=Oocyte meiosis.cfa04120=Ubiquitin mediated proteolysis.cfa04914=Progesterone-mediated oocyte maturation.cfa05166=HTLV-I infection
+Contig8_chr16_47195242_47195504	193	chr16	47195429	ENSCAFT00000011721	ENSCAFP00000010862	S	4369	W	475621	benign	N
+Contig84_chr27_5882441_5882771	145	chr27	5882579	ENSCAFT00000011730	ENSCAFP00000010871	C	289	S	486534	benign	N
+Contig42_chr24_25316755_25317362	320	chr24	25317091	ENSCAFT00000011771	ENSCAFP00000010910	G	22	S	477193	benign	N
+Contig45_chr24_25318544_25319490	734	chr24	25319299	ENSCAFT00000011771	ENSCAFP00000010910	V	187	A	477193	benign	N
+Contig31_chr24_25434125_25435133	853	chr24	25434975	ENSCAFT00000011789	ENSCAFP00000010928	S	91	I	609978	benign	N
+Contig20_chr3_10579133_10580085	600	chr3	10579729	ENSCAFT00000011968	ENSCAFP00000011099	K	165	E	488881	benign	cfa00760=Nicotinate and nicotinamide metabolism.cfa04146=Peroxisome
+Contig45_chr2_54585564_54588038	1047	chr2	54586611	ENSCAFT00000012081	ENSCAFP00000011198	T	969	M	478082	benign	cfa04621=NOD-like receptor signaling pathway
+Contig156_chr1_122375741_122376035	168	chr1	122375904	ENSCAFT00000012133	ENSCAFP00000011248	R	628	K	611998	benign	N
+Contig153_chr1_124036982_124040108	1588	chr1	124038585	ENSCAFT00000012159	ENSCAFP00000011272	A	887	T	484609	benign	N
+Contig32_chr24_26900375_26900913	394	chr24	26900761	ENSCAFT00000012254	ENSCAFP00000011358	H	51	Y	U	benign	N
+Contig103_chr16_48829082_48829675	123	chr16	48829205	ENSCAFT00000012381	ENSCAFP00000011471	E	369	G	475632	possibly damaging	N
+Contig25_chr18_41490135_41493501	534	chr18	41490665	ENSCAFT00000012414	ENSCAFP00000011503	R	703	C	483489	probably damaging	cfa04520=Adherens junction.cfa04670=Leukocyte transendothelial migration
+Contig69_chr16_49314879_49317228	1810	chr16	49316689	ENSCAFT00000012456	ENSCAFP00000011541	P	431	L	475636	probably damaging	cfa00565=Ether lipid metabolism
+Contig71_chr17_42734055_42736474	2240	chr17	42736298	ENSCAFT00000012478	ENSCAFP00000011561	R	307	Q	483083	benign	cfa00830=Retinol metabolism
+Contig17_chr17_43378842_43379885	305	chr17	43379148	ENSCAFT00000012676	ENSCAFP00000011740	T	196	M	U	probably damaging	N
+Contig195_chr27_7047911_7049009	555	chr27	7048468	ENSCAFT00000012942	ENSCAFP00000011978	R	881	L	477608	benign	N
+Contig112_chr30_4254316_4256576	1478	chr30	4255785	ENSCAFT00000012974	ENSCAFP00000012007	V	2939	I	U	benign	N
+Contig43_chr20_39124486_39124798	114	chr20	39124607	ENSCAFT00000013097	ENSCAFP00000012118	G	325	R	607274	possibly damaging	N
+Contig96_chr16_55849292_55849592	194	chr16	55849494	ENSCAFT00000013360	ENSCAFP00000012363	A	41	S	482932	benign	cfa04060=Cytokine-cytokine receptor interaction.cfa04150=mTOR signaling pathway.cfa04510=Focal adhesion.cfa05200=Pathways in cancer.cfa05211=Renal cell carcinoma.cfa05212=Pancreatic cancer.cfa05219=Bladder cancer
+Contig91_chr17_51684551_51689453	4154	chr17	51688687	ENSCAFT00000013395	ENSCAFP00000012395	P	306	L	475784	benign	N
+Contig192_chr26_12794366_12794712	143	chr26	12794506	ENSCAFT00000014076	ENSCAFP00000013021	V	2478	I	477486	benign	N
+Contig191_chr31_30109152_30109760	212	chr31	30109363	ENSCAFT00000014113	ENSCAFP00000013055	A	1813	T	487735	probably damaging	N
+Contig116_chr24_29683980_29684819	101	chr24	29684079	ENSCAFT00000014115	ENSCAFP00000013057	R	836	C	485868	probably damaging	N
+Contig8_chr32_9413601_9414435	74	chr32	9413675	ENSCAFT00000014257	ENSCAFP00000013183	N	236	K	478452	probably damaging	cfa00270=Cysteine and methionine metabolism
+Contig90_chr21_43253791_43254774	189	chr21	43253974	ENSCAFT00000014325	ENSCAFP00000013248	I	758	V	U	benign	N
+Contig76_chr24_30292767_30294101	552	chr24	30293321	ENSCAFT00000014346	ENSCAFP00000013267	A	349	T	U	benign	N
+Contig21_chr25_37121451_37122072	177	chr25	37121616	ENSCAFT00000014616	ENSCAFP00000013518	V	157	L	486118	benign	N
+Contig15_chr36_6357141_6362626	5226	chr36	6362346	ENSCAFT00000014702	ENSCAFP00000013598	N	138	K	607626	possibly damaging	N
+Contig64_chr17_54734453_54734993	109	chr17	54734552	ENSCAFT00000014707	ENSCAFP00000013603	S	302	L	483124	benign	N
+Contig91_chr18_46134014_46136042	330	chr18	46134347	ENSCAFT00000014736	ENSCAFP00000013630	A	214	S	483635	benign	cfa00561=Glycerolipid metabolism.cfa00564=Glycerophospholipid metabolism.cfa01100=Metabolic pathways.cfa04070=Phosphatidylinositol signaling system
+Contig111_chr31_31237314_31238628	920	chr31	31238220	ENSCAFT00000014822	ENSCAFP00000013714	S	143	C	478408	benign	N
+Contig77_chr38_3502296_3503058	349	chr38	3502639	ENSCAFT00000015260	ENSCAFP00000014122	K	666	E	478932	benign	N
+Contig59_chr38_3998294_3999004	369	chr38	3998672	ENSCAFT00000015347	ENSCAFP00000014201	V	791	I	U	benign	N
+Contig123_chr31_34367825_34368648	664	chr31	34368468	ENSCAFT00000015534	ENSCAFP00000014373	H	204	Q	U	possibly damaging	N
+Contig43_chr30_11874641_11875130	198	chr30	11874850	ENSCAFT00000015654	ENSCAFP00000014488	R	3422	C	U	benign	N
+Contig9_chr20_40741488_40743247	1027	chr20	40742525	ENSCAFT00000015816	ENSCAFP00000014638	M	183	V	484744	benign	N
+Contig137_chr5_7048977_7051042	863	chr5	7049840	ENSCAFT00000015844	ENSCAFP00000014662	A	311	V	479391	benign	N
+Contig9_chr28_17675067_17680985	1564	chr28	17676618	ENSCAFT00000015971	ENSCAFP00000014772	R	515	P	477805	unknown	N
+Contig126_chr30_12286682_12287475	407	chr30	12287101	ENSCAFT00000016062	ENSCAFP00000014854	V	450	I	487517	benign	cfa00052=Galactose metabolism.cfa00500=Starch and sucrose metabolism.cfa01100=Metabolic pathways
+Contig127_chr30_12287497_12288447	608	chr30	12288095	ENSCAFT00000016062	ENSCAFP00000014854	T	495	M	487517	benign	cfa00052=Galactose metabolism.cfa00500=Starch and sucrose metabolism.cfa01100=Metabolic pathways
+Contig13_chr38_5058391_5058630	66	chr38	5058458	ENSCAFT00000016099	ENSCAFP00000014887	F	412	L	478943	benign	N
+Contig169_chr35_19985467_19986000	455	chr35	19985921	ENSCAFT00000016165	ENSCAFP00000014950	T	175	I	478733	benign	N
+Contig2_chr35_21794536_21795092	291	chr35	21794865	ENSCAFT00000016208	ENSCAFP00000014992	V	84	A	488238	benign	cfa00561=Glycerolipid metabolism.cfa00564=Glycerophospholipid metabolism.cfa01100=Metabolic pathways
+Contig141_chr26_19278751_19279229	364	chr26	19279128	ENSCAFT00000016284	ENSCAFP00000015064	N	29	S	404011	benign	cfa00564=Glycerophospholipid metabolism.cfa00565=Ether lipid metabolism.cfa00590=Arachidonic acid metabolism.cfa00591=Linoleic acid metabolism.cfa00592=alpha-Linolenic acid metabolism.cfa01100=Metabolic pathways.cfa04010=MAPK signaling pathway.cfa04270=Vascular smooth muscle contraction.cfa04370=VEGF signaling pathway.cfa04664=Fc epsilon RI signaling pathway.cfa04724=Glutamatergic synapse.cfa04730=Long-term depression.cfa04912=GnRH signaling pathway.cfa04972=Pancreatic secretion.cfa04975=Fat digestion and absorption.cfa05145=Toxoplasmosis
+Contig179_chr3_40781459_40782026	285	chr3	40781763	ENSCAFT00000016410	ENSCAFP00000015182	D	1174	N	488699	benign	N
+Contig237_chr21_53631024_53632458	203	chr21	53631227	ENSCAFT00000016459	ENSCAFP00000015227	C	47	W	403799	probably damaging	cfa04664=Fc epsilon RI signaling pathway.cfa05310=Asthma
+Contig186_chr2_71203100_71204111	202	chr2	71203303	ENSCAFT00000016485	ENSCAFP00000015250	S	188	T	478144	benign	cfa00330=Arginine and proline metabolism.cfa01100=Metabolic pathways
+Contig1_chr28_18779291_18780149	325	chr28	18779619	ENSCAFT00000016578	ENSCAFP00000015340	N	245	Y	U	probably damaging	N
+Contig166_chr4_77425871_77426835	797	chr4	77426667	ENSCAFT00000016670	ENSCAFP00000015429	D	115	G	479370	benign	cfa00970=Aminoacyl-tRNA biosynthesis
+Contig35_chr24_36806524_36807086	367	chr24	36806891	ENSCAFT00000016727	ENSCAFP00000015478	F	345	L	485910	benign	N
+Contig45_chr37_8610877_8611425	194	chr37	8611078	ENSCAFT00000016761	ENSCAFP00000015511	D	2849	N	488452	possibly damaging	N
+Contig39_chr28_19446540_19447838	1068	chr28	19447566	ENSCAFT00000016791	ENSCAFP00000015537	A	1596	E	U	benign	N
+Contig161_chr18_51013230_51015381	1494	chr18	51014735	ENSCAFT00000016827	ENSCAFP00000015571	L	977	V	475999	benign	N
+Contig25_chr28_19619108_19621267	1728	chr28	19620832	ENSCAFT00000016848	ENSCAFP00000034237	I	108	V	609723	benign	N
+Contig33_chr20_42063173_42064259	623	chr20	42063789	ENSCAFT00000017070	ENSCAFP00000015794	V	179	M	U	probably damaging	N
+Contig39_chr38_14681397_14682234	384	chr38	14681781	ENSCAFT00000017072	ENSCAFP00000015796	H	282	N	488593	unknown	N
+Contig6_chr32_27303975_27304541	425	chr32	27304407	ENSCAFT00000017178	ENSCAFP00000015896	S	354	T	610098	benign	N
+Contig173_chr38_17709765_17711029	179	chr38	17709941	ENSCAFT00000017240	ENSCAFP00000015955	G	464	R	U	benign	N
+Contig52_chr32_27452924_27453332	91	chr32	27452999	ENSCAFT00000017249	ENSCAFP00000015964	A	22	S	U	benign	N
+Contig319_chr34_14684259_14684663	353	chr34	14684613	ENSCAFT00000017314	ENSCAFP00000016025	R	5	Q	478632	benign	N
+Contig32_chr2_72269353_72269814	349	chr2	72269708	ENSCAFT00000017327	ENSCAFP00000016037	P	853	L	487317	possibly damaging	N
+Contig206_chr9_18720001_18720613	155	chr9	18720160	ENSCAFT00000017373	ENSCAFP00000016082	D	1621	E	480456	benign	cfa02010=ABC transporters
+Contig35_chr37_10562149_10562621	74	chr37	10562222	ENSCAFT00000017444	ENSCAFP00000016153	I	975	V	478858	benign	cfa04727=GABAergic synapse
+Contig1_chr30_12655575_12656916	370	chr30	12655947	ENSCAFT00000017777	ENSCAFP00000016457	L	639	M	608886	probably damaging	N
+Contig63_chr27_23738716_23739879	1131	chr27	23739850	ENSCAFT00000017892	ENSCAFP00000016566	P	642	L	486627	benign	N
+Contig44_chr28_28123120_28124627	1348	chr28	28124495	ENSCAFT00000017967	ENSCAFP00000016639	V	261	A	477827	benign	N
+Contig23_chrX_6416128_6417014	455	chrX	6416585	ENSCAFT00000018017	ENSCAFP00000016684	H	111	R	491733	possibly damaging	N
+Contig31_chr7_8282189_8286932	3631	chr7	8285875	ENSCAFT00000018057	ENSCAFP00000016724	L	655	P	490260	benign	N
+Contig318_chr6_8706066_8706350	76	chr6	8706142	ENSCAFT00000018106	ENSCAFP00000016769	K	318	N	607700	possibly damaging	cfa04062=Chemokine signaling pathway.cfa04145=Phagosome.cfa04380=Osteoclast differentiation.cfa04666=Fc gamma R-mediated phagocytosis.cfa04670=Leukocyte transendothelial migration.cfa05140=Leishmaniasis
+Contig36_chr32_33046881_33048369	1118	chr32	33047990	ENSCAFT00000018307	ENSCAFP00000016954	E	555	A	403657	benign	cfa04010=MAPK signaling pathway.cfa04012=ErbB signaling pathway.cfa04060=Cytokine-cytokine receptor interaction.cfa04144=Endocytosis.cfa04510=Focal adhesion.cfa04540=Gap junction.cfa04810=Regulation of actin cytoskeleton.cfa05160=Hepatitis C.cfa05200=Pathways in cancer.cfa05212=Pancreatic cancer.cfa05213=Endometrial cancer.cfa05214=Glioma.cfa05215=Prostate cancer.cfa05218=Melanoma.cfa05219=Bladder cancer.cfa05223=Non-small cell lung cancer
+Contig32_chr5_14476595_14477214	347	chr5	14476948	ENSCAFT00000018571	ENSCAFP00000017201	V	209	A	610296	benign	N
+Contig88_chr34_19031138_19031937	343	chr34	19031477	ENSCAFT00000018684	ENSCAFP00000017309	K	670	R	478645	benign	cfa00280=Valine, leucine and isoleucine degradation.cfa01100=Metabolic pathways
+Contig188_chr25_47927372_47928085	557	chr25	47927941	ENSCAFT00000018758	ENSCAFP00000017379	K	228	R	486167	benign	cfa00561=Glycerolipid metabolism.cfa00564=Glycerophospholipid metabolism.cfa01100=Metabolic pathways.cfa04070=Phosphatidylinositol signaling system
+Contig36_chr37_12924359_12924740	86	chr37	12924449	ENSCAFT00000018786	ENSCAFP00000017406	D	187	Y	608849	probably damaging	cfa00280=Valine, leucine and isoleucine degradation.cfa00350=Tyrosine metabolism.cfa00380=Tryptophan metabolism.cfa00750=Vitamin B6 metabolism.cfa00760=Nicotinate and nicotinamide metabolism.cfa00982=Drug metabolism - cytochrome P450.cfa01100=Metabolic pathways
+Contig3_chr34_19471626_19472377	337	chr34	19471956	ENSCAFT00000018788	ENSCAFP00000017408	R	239	Q	488096	possibly damaging	N
+Contig80_chr4_11155760_11156827	952	chr4	11156735	ENSCAFT00000018796	ENSCAFP00000017416	S	661	N	479204	benign	cfa00564=Glycerophospholipid metabolism.cfa04146=Peroxisome
+Contig56_chr8_7093747_7095987	683	chr8	7094428	ENSCAFT00000018813	ENSCAFP00000017431	P	126	R	490620	unknown	N
+Contig82_chr8_7111986_7114065	1351	chr8	7113329	ENSCAFT00000018871	ENSCAFP00000017488	R	608	H	480255	probably damaging	cfa00010=Glycolysis / Gluconeogenesis.cfa00020=Citrate cycle (TCA cycle).cfa00620=Pyruvate metabolism.cfa01100=Metabolic pathways.cfa03320=PPAR signaling pathway.cfa04910=Insulin signaling pathway.cfa04920=Adipocytokine signaling pathway.cfa04964=Proximal tubule bicarbonate reclamation
+Contig172_chr33_28585454_28586084	228	chr33	28585687	ENSCAFT00000018884	ENSCAFP00000017500	R	36	K	478584	benign	N
+Contig113_chr5_16682954_16684491	688	chr5	16683641	ENSCAFT00000018997	ENSCAFP00000017606	F	41	L	489360	benign	N
+Contig36_chr28_31449413_31452160	2111	chr28	31451506	ENSCAFT00000019041	ENSCAFP00000017650	P	252	H	477834	benign	cfa04144=Endocytosis
+Contig80_chr3_55628026_55628800	392	chr3	55628403	ENSCAFT00000019070	ENSCAFP00000017677	R	805	K	403913	benign	cfa00480=Glutathione metabolism.cfa01100=Metabolic pathways.cfa04614=Renin-angiotensin system.cfa04640=Hematopoietic cell lineage
+Contig99_chr7_11816365_11819255	806	chr7	11817201	ENSCAFT00000019101	ENSCAFP00000017707	C	305	G	490276	benign	N
+Contig114_chr4_12744102_12745318	148	chr4	12744256	ENSCAFT00000019279	ENSCAFP00000017880	I	700	V	U	benign	N
+Contig82_chr7_13056757_13058281	974	chr7	13057742	ENSCAFT00000019316	ENSCAFP00000017915	S	283	N	609933	benign	cfa00564=Glycerophospholipid metabolism
+Contig280_chr25_51367477_51367885	70	chr25	51367542	ENSCAFT00000019610	ENSCAFP00000018191	S	97	L	U	benign	N
+Contig35_chr20_43508791_43509352	460	chr20	43509254	ENSCAFT00000019627	ENSCAFP00000018204	V	77	A	608455	benign	cfa00190=Oxidative phosphorylation.cfa01100=Metabolic pathways.cfa04260=Cardiac muscle contraction.cfa05010=Alzheimer's disease.cfa05012=Parkinson's disease.cfa05016=Huntington's disease
+Contig36_chr20_43509362_43510980	1484	chr20	43510860	ENSCAFT00000019627	ENSCAFP00000018204	D	181	N	608455	benign	cfa00190=Oxidative phosphorylation.cfa01100=Metabolic pathways.cfa04260=Cardiac muscle contraction.cfa05010=Alzheimer's disease.cfa05012=Parkinson's disease.cfa05016=Huntington's disease
+Contig59_chr25_51807653_51809044	1064	chr25	51808739	ENSCAFT00000019760	ENSCAFP00000018330	R	235	K	U	benign	N
+Contig96_chr36_17712997_17714068	556	chr36	17713559	ENSCAFT00000019807	ENSCAFP00000018374	T	423	I	478789	benign	N
+Contig163_chr28_34927368_34929275	1128	chr28	34928486	ENSCAFT00000019866	ENSCAFP00000018425	A	2659	T	477850	benign	N
+Contig74_chr33_31230250_31230874	246	chr33	31230493	ENSCAFT00000019938	ENSCAFP00000018492	G	113	S	488016	probably damaging	N
+Contig130_chr7_15553315_15558308	3186	chr7	15556497	ENSCAFT00000020009	ENSCAFP00000018561	K	1513	N	U	benign	N
+Contig160_chr2_76816412_76817166	354	chr2	76816779	ENSCAFT00000020143	ENSCAFP00000018683	I	190	V	478173	benign	N
+Contig219_chr33_31871568_31871771	81	chr33	31871646	ENSCAFT00000020195	ENSCAFP00000018733	N	346	H	U	probably damaging	N
+Contig254_chr24_50001599_50001992	151	chr24	50001767	ENSCAFT00000020266	ENSCAFP00000018803	R	239	Q	U	benign	N
+Contig40_chr37_15283702_15285945	1908	chr37	15285621	ENSCAFT00000020408	ENSCAFP00000018937	A	809	G	U	unknown	N
+Contig59_chr20_43702094_43703358	450	chr20	43702540	ENSCAFT00000020438	ENSCAFP00000018965	S	217	A	U	benign	N
+Contig75_chr3_57465650_57466327	377	chr3	57466017	ENSCAFT00000020863	ENSCAFP00000019371	L	205	F	609716	probably damaging	N
+Contig155_chr2_79195879_79199423	2014	chr2	79197892	ENSCAFT00000021154	ENSCAFP00000019645	G	549	S	U	benign	N
+Contig155_chr2_79195879_79199423	3136	chr2	79199014	ENSCAFT00000021154	ENSCAFP00000019645	R	923	C	U	probably damaging	N
+Contig59_chr5_19784971_19787384	1310	chr5	19786293	ENSCAFT00000021222	ENSCAFP00000019707	V	171	I	479428.489393	benign	cfa03320=PPAR signaling pathway
+Contig41_chr30_14304605_14305465	206	chr30	14304816	ENSCAFT00000021612	ENSCAFP00000020069	A	157	G	U	benign	N
+Contig47_chr20_45043804_45044476	317	chr20	45044117	ENSCAFT00000021659	ENSCAFP00000020114	V	281	I	609323	benign	N
+Contig46_chr4_22849549_22849829	123	chr4	22849673	ENSCAFT00000021752	ENSCAFP00000020204	V	646	M	U	probably damaging	N
+Contig141_chr7_22360980_22361690	242	chr7	22361233	ENSCAFT00000021777	ENSCAFP00000020227	K	1862	R	U	unknown	N
+Contig59_chr30_14758622_14760653	1186	chr30	14759817	ENSCAFT00000021792	ENSCAFP00000020241	S	284	R	609256	benign	N
+Contig57_chr27_39696388_39698349	1026	chr27	39697428	ENSCAFT00000021846	ENSCAFP00000020293	Q	588	R	477699	benign	cfa04610=Complement and coagulation cascades
+Contig83_chr27_40151814_40153141	738	chr27	40152551	ENSCAFT00000022064	ENSCAFP00000020490	S	191	R	477702	benign	N
+Contig105_chr6_11901733_11904968	406	chr6	11902145	ENSCAFT00000022289	ENSCAFP00000020701	Y	55	H	479732	probably damaging	cfa04621=NOD-like receptor signaling pathway
+Contig43_chr36_25298890_25299602	235	chr36	25299132	ENSCAFT00000022319	ENSCAFP00000020728	E	11731	K	610299.610339	unknown	N
+Contig3_chr36_25193150_25202641	2802	chr36	25195983	ENSCAFT00000022319	ENSCAFP00000020728	I	30137	V	610299.610339	benign	N
+Contig585_chr3_61201332_61201904	139	chr3	61201468	ENSCAFT00000022529	ENSCAFP00000020918	L	97	V	479067	benign	cfa00071=Fatty acid metabolism.cfa03320=PPAR signaling pathway.cfa04920=Adipocytokine signaling pathway
+Contig1_chr20_46714929_46715937	434	chr20	46715327	ENSCAFT00000022571	ENSCAFP00000020958	A	18	P	484804	unknown	N
+Contig7_chr8_29376780_29378260	158	chr8	29376937	ENSCAFT00000022576	ENSCAFP00000020962	T	852	A	490678	benign	N
+Contig74_chr8_29656170_29657212	595	chr8	29656776	ENSCAFT00000022697	ENSCAFP00000021080	E	974	K	490682	possibly damaging	cfa04010=MAPK signaling pathway.cfa04012=ErbB signaling pathway.cfa04062=Chemokine signaling pathway.cfa04320=Dorso-ventral axis formation.cfa04510=Focal adhesion.cfa04540=Gap junction.cfa04630=Jak-STAT signaling pathway.cfa04650=Natural killer cell mediated cytotoxicity.cfa04660=T cell receptor signaling pathway.cfa04662=B cell receptor signaling pathway.cfa04664=Fc epsilon RI signaling pathway.cfa04722=Neurotrophin signaling pathway.cfa04810=Regulation of actin cytoskeleton.cfa04910=Insulin signaling pathway.cfa04912=GnRH signaling pathway.cfa05160=Hepatitis C.cfa05200=Pathways in cancer.cfa05211=Renal cell carcinoma.cfa05213=Endometrial cancer.cfa05214=Glioma.cfa05215=Prostate cancer.cfa05220=Chronic myeloid leukemia.cfa05221=Acute myeloid leukemia.cfa05223=Non-small cell lung cancer
+Contig45_chr4_25273541_25274402	608	chr4	25274121	ENSCAFT00000022760	ENSCAFP00000021140	S	30	F	479239	probably damaging	cfa04360=Axon guidance
+Contig96_chr37_26111249_26111450	108	chr37	26111364	ENSCAFT00000022884	ENSCAFP00000021256	K	260	R	478902	benign	cfa03450=Non-homologous end-joining
+Contig196_chr3_62434637_62435063	203	chr3	62434823	ENSCAFT00000022915	ENSCAFP00000021284	L	174	P	488785	benign	N
+Contig15_chr6_12238116_12239737	1287	chr6	12239420	ENSCAFT00000022961	ENSCAFP00000021328	E	165	K	479735	benign	N
+Contig175_chr5_27267391_27267870	57	chr5	27267451	ENSCAFT00000023032	ENSCAFP00000021395	N	1094	S	479450	benign	cfa04110=Cell cycle.cfa04115=p53 signaling pathway.cfa04210=Apoptosis.cfa05166=HTLV-I infection
+Contig110_chr20_47192181_47193618	93	chr20	47192262	ENSCAFT00000023054	ENSCAFP00000021407	A	308	P	484814	probably damaging	N
+Contig9_chr4_26730063_26730585	245	chr4	26730316	ENSCAFT00000023087	ENSCAFP00000021437	E	153	D	489044	benign	N
+Contig1_chr34_35420831_35421658	73	chr34	35420908	ENSCAFT00000023111	ENSCAFP00000021457	V	251	I	488144	benign	N
+Contig199_chr2_79696091_79697603	751	chr2	79696840	ENSCAFT00000023253	ENSCAFP00000021593	D	54	A	U	possibly damaging	N
+Contig146_chrX_38946913_38947473	307	chrX	38947225	ENSCAFT00000023268	ENSCAFP00000021608	L	160	V	612457	possibly damaging	N
+Contig63_chr9_23532151_23533554	1297	chr9	23533421	ENSCAFT00000023438	ENSCAFP00000021767	Q	279	R	490958	benign	N
+Contig89_chr5_32060784_32061151	293	chr5	32061079	ENSCAFT00000023913	ENSCAFP00000022199	W	106	*	489430	probably damaging	N
+Contig15_chr3_65640843_65642155	1100	chr3	65641942	ENSCAFT00000023933	ENSCAFP00000022218	V	383	A	479080	benign	N
+Contig49_chr26_33571748_33572620	689	chr26	33572452	ENSCAFT00000024062	ENSCAFP00000022339	R	478	W	486440	benign	N
+Contig96_chr20_48055741_48057197	524	chr20	48056259	ENSCAFT00000024100	ENSCAFP00000022374	R	172	Q	U	benign	N
+Contig104_chr20_48062263_48062546	210	chr20	48062492	ENSCAFT00000024100	ENSCAFP00000022374	V	775	G	U	probably damaging	N
+Contig33_chr37_28794567_28796956	2144	chr37	28796718	ENSCAFT00000024137	ENSCAFP00000022408	E	279	Q	488536	benign	N
+Contig24_chr7_32005266_32005660	212	chr7	32005479	ENSCAFT00000024154	ENSCAFP00000022424	T	92	M	U	probably damaging	N
+Contig174_chr18_56896461_56897594	274	chr18	56896734	ENSCAFT00000024637	ENSCAFP00000022858	V	157	L	483779	benign	cfa04130=SNARE interactions in vesicular transport
+Contig55_chr20_48811642_48812027	299	chr20	48811941	ENSCAFT00000024761	ENSCAFP00000022970	H	993	R	476678	benign	N
+Contig220_chr18_56925351_56927006	920	chr18	56926246	ENSCAFT00000024787	ENSCAFP00000022995	P	420	Q	476051	possibly damaging	cfa03022=Basal transcription factors.cfa05168=Herpes simplex infection
+Contig12_chr8_39044824_39045409	359	chr8	39045181	ENSCAFT00000024804	ENSCAFP00000023011	I	280	T	612894	possibly damaging	N
+Contig23_chr3_72567678_72570858	1313	chr3	72568976	ENSCAFT00000024846	ENSCAFP00000023051	L	298	P	488826	benign	N
+Contig190_chr7_35896301_35896811	232	chr7	35896528	ENSCAFT00000024892	ENSCAFP00000023095	R	3	L	480092	unknown	cfa00020=Citrate cycle (TCA cycle).cfa01100=Metabolic pathways.cfa05200=Pathways in cancer.cfa05211=Renal cell carcinoma
+Contig119_chr20_49114009_49114654	266	chr20	49114270	ENSCAFT00000024934	ENSCAFP00000023135	F	339	L	484849	benign	N
+Contig47_chr8_41487304_41487682	210	chr8	41487515	ENSCAFT00000025088	ENSCAFP00000023286	S	1743	L	490729	possibly damaging	N
+Contig67_chr2_84099157_84100880	345	chr2	84099493	ENSCAFT00000025109	ENSCAFP00000023307	I	60	L	U	benign	N
+Contig33_chr20_49727730_49730958	2192	chr20	49729935	ENSCAFT00000025308	ENSCAFP00000023495	T	448	R	U	probably damaging	N
+Contig33_chr20_49727730_49730958	2907	chr20	49730606	ENSCAFT00000025308	ENSCAFP00000023495	W	493	L	U	benign	N
+Contig93_chr8_42181027_42183022	694	chr8	42181716	ENSCAFT00000025462	ENSCAFP00000023641	L	782	P	U	benign	N
+Contig131_chrX_44937490_44940040	950	chrX	44938456	ENSCAFT00000025663	ENSCAFP00000023835	V	120	M	491894	benign	N
+Contig100_chrX_44915404_44918232	1832	chrX	44917224	ENSCAFT00000025663	ENSCAFP00000023835	R	1212	Q	491894	benign	N
+Contig100_chrX_44915404_44918232	920	chrX	44916331	ENSCAFT00000025663	ENSCAFP00000023835	L	1377	V	491894	benign	N
+Contig123_chr9_26132942_26133532	310	chr9	26133253	ENSCAFT00000025948	ENSCAFP00000024090	I	232	V	491022	benign	N
+Contig34_chr6_17772839_17773548	489	chr6	17773329	ENSCAFT00000026008	ENSCAFP00000024146	E	377	Q	U	benign	N
+Contig382_chr7_43383655_43383893	190	chr7	43383854	ENSCAFT00000026053	ENSCAFP00000024188	R	123	C	U	possibly damaging	N
+Contig163_chr2_87404548_87404792	132	chr2	87404673	ENSCAFT00000026251	ENSCAFP00000024378	D	239	N	U	benign	N
+Contig15_chr3_91850893_91851323	75	chr3	91850967	ENSCAFT00000026343	ENSCAFP00000024465	S	722	N	595148	benign	cfa04360=Axon guidance
+Contig141_chr7_44385686_44386047	166	chr7	44385857	ENSCAFT00000026393	ENSCAFP00000024510	L	166	P	490412	benign	cfa04810=Regulation of actin cytoskeleton
+Contig161_chr2_87840986_87841705	540	chr2	87841516	ENSCAFT00000026485	ENSCAFP00000024598	F	678	C	478233	probably damaging	cfa03018=RNA degradation
+Contig177_chr9_27497479_27498192	354	chr9	27497831	ENSCAFT00000026613	ENSCAFP00000024719	A	175	V	491046	possibly damaging	N
+Contig162_chr6_20156115_20157725	81	chr6	20156197	ENSCAFT00000026687	ENSCAFP00000024793	T	702	M	489923.489924.607168	benign	N
+Contig8_chr9_28287278_28288276	469	chr9	28287755	ENSCAFT00000026707	ENSCAFP00000024813	A	75	P	491060	benign	N
+Contig166_chr7_45276673_45277595	235	chr7	45276916	ENSCAFT00000026881	ENSCAFP00000024984	V	525	I	490428	benign	N
+Contig16_chr8_51223078_51223662	481	chr8	51223563	ENSCAFT00000026967	ENSCAFP00000025070	R	869	Q	490790	benign	N
+Contig65_chr9_29792446_29793465	893	chr9	29793341	ENSCAFT00000027073	ENSCAFP00000025173	S	81	A	491082	benign	N
+Contig175_chr6_30926774_30927470	446	chr6	30927229	ENSCAFT00000027269	ENSCAFP00000025361	S	663	T	403453	benign	cfa02010=ABC transporters.cfa04977=Vitamin digestion and absorption
+Contig45_chr30_33024389_33025619	471	chr30	33024857	ENSCAFT00000027320	ENSCAFP00000025407	G	986	A	487608	benign	N
+Contig60_chr20_53087461_53088013	184	chr20	53087649	ENSCAFT00000027519	ENSCAFP00000025591	S	556	L	611163	benign	N
+Contig98_chr5_37073086_37073674	378	chr5	37073467	ENSCAFT00000027596	ENSCAFP00000025664	V	38	M	479499	probably damaging	cfa04130=SNARE interactions in vesicular transport
+Contig64_chr9_36235086_36235751	475	chr9	36235563	ENSCAFT00000027673	ENSCAFP00000025737	D	260	E	491111	benign	cfa04970=Salivary secretion
+Contig72_chr30_35330469_35330831	236	chr30	35330709	ENSCAFT00000027712	ENSCAFP00000025770	G	386	C	478353	probably damaging	cfa04510=Focal adhesion.cfa04512=ECM-receptor interaction.cfa04810=Regulation of actin cytoskeleton.cfa05410=Hypertrophic cardiomyopathy (HCM).cfa05412=Arrhythmogenic right ventricular cardiomyopathy (ARVC).cfa05414=Dilated cardiomyopathy
+Contig12_chr8_66066327_66066629	89	chr8	66066402	ENSCAFT00000027927	ENSCAFP00000025970	K	158	R	490836	benign	N
+Contig212_chr8_66173086_66174259	622	chr8	66173712	ENSCAFT00000027950	ENSCAFP00000025993	K	114	Q	480421	benign	N
+Contig176_chr7_48083671_48084458	311	chr7	48083983	ENSCAFT00000027972	ENSCAFP00000026015	R	128	H	480148	probably damaging	N
+Contig3_chr4_58820541_58821952	265	chr4	58820806	ENSCAFT00000027979	ENSCAFP00000026022	A	31	T	489166	benign	N
+Contig24_chr7_48238665_48239174	383	chr7	48239049	ENSCAFT00000028007	ENSCAFP00000026049	T	227	M	480151	probably damaging	N
+Contig25_chr6_26340448_26341519	657	chr6	26341104	ENSCAFT00000028115	ENSCAFP00000026155	S	128	L	479811	possibly damaging	N
+Contig212_chr5_38871122_38871621	302	chr5	38871429	ENSCAFT00000028231	ENSCAFP00000026253	P	2265	L	489507	benign	N
+Contig147_chr6_27310627_27310983	100	chr6	27310719	ENSCAFT00000028327	ENSCAFP00000026344	V	154	A	U	benign	N
+Contig160_chr6_27318582_27318861	67	chr6	27318647	ENSCAFT00000028327	ENSCAFP00000026344	K	325	R	U	benign	N
+Contig18_chr4_61023435_61026038	385	chr4	61023825	ENSCAFT00000028363	ENSCAFP00000026377	P	4110	L	479323	benign	N
+Contig162_chr30_40685605_40687049	343	chr30	40685956	ENSCAFT00000028463	ENSCAFP00000026472	A	416	P	487646	benign	N
+Contig68_chr20_54017481_54018354	221	chr20	54017705	ENSCAFT00000028500	ENSCAFP00000026509	W	539	R	U	benign	N
+Contig50_chr7_59076761_59079381	2353	chr7	59079104	ENSCAFT00000028551	ENSCAFP00000026557	V	1487	I	490492	benign	N
+Contig51_chr7_59079274_59084588	2611	chr7	59081905	ENSCAFT00000028551	ENSCAFP00000026557	A	575	V	490492	benign	N
+Contig3_chr20_54855789_54856135	37	chr20	54855833	ENSCAFT00000028813	ENSCAFP00000026796	F	6015	S	U	unknown	N
+Contig157_chr5_43472186_43472528	168	chr5	43472353	ENSCAFT00000028826	ENSCAFP00000026807	R	355	Q	489526	benign	cfa00010=Glycolysis / Gluconeogenesis.cfa00340=Histidine metabolism.cfa00350=Tyrosine metabolism.cfa00360=Phenylalanine metabolism.cfa00410=beta-Alanine metabolism.cfa00980=Metabolism of xenobiotics by cytochrome P450.cfa00982=Drug metabolism - cytochrome P450.cfa01100=Metabolic pathways
+Contig80_chr20_55281094_55281971	129	chr20	55281228	ENSCAFT00000028936	ENSCAFP00000026914	T	931	A	U	benign	N
+Contig214_chr8_74493164_74493474	188	chr8	74493346	ENSCAFT00000029054	ENSCAFP00000027017	R	94	C	U	probably damaging	N
+Contig259_chr20_55571618_55572503	186	chr20	55571803	ENSCAFT00000029100	ENSCAFP00000027059	K	526	Q	485001	benign	N
+Contig180_chr9_41668066_41668716	357	chr9	41668451	ENSCAFT00000029122	ENSCAFP00000027081	E	990	D	491145	benign	cfa03410=Base excision repair
+Contig61_chr4_63087183_63089623	491	chr4	63087672	ENSCAFT00000029130	ENSCAFP00000027089	A	20	S	U	benign	N
+Contig261_chrX_94412915_94414298	488	chrX	94413396	ENSCAFT00000029188	ENSCAFP00000027142	D	329	E	U	unknown	N
+Contig58_chr4_70221679_70223505	1749	chr4	70223432	ENSCAFT00000029501	ENSCAFP00000027423	T	324	S	403721	benign	cfa04060=Cytokine-cytokine receptor interaction.cfa04080=Neuroactive ligand-receptor interaction.cfa04630=Jak-STAT signaling pathway
+Contig21_chr7_77985141_77986170	827	chr7	77985962	ENSCAFT00000029651	ENSCAFP00000027557	A	855	S	490545	benign	N
+Contig93_chrX_104176429_104177974	811	chrX	104177246	ENSCAFT00000029709	ENSCAFP00000027610	T	719	M	492128	benign	cfa03008=Ribosome biogenesis in eukaryotes
+Contig175_chr9_46116277_46118268	1090	chr9	46117366	ENSCAFT00000029722	ENSCAFP00000027622	Q	693	H	U	benign	N
+Contig134_chr4_76495667_76496825	860	chr4	76496507	ENSCAFT00000029827	ENSCAFP00000027720	I	113	V	612589	benign	cfa00250=Alanine, aspartate and glutamate metabolism.cfa00260=Glycine, serine and threonine metabolism.cfa01100=Metabolic pathways
+Contig247_chr6_31967574_31967796	158	chr6	31967732	ENSCAFT00000029875	ENSCAFP00000027765	P	750	T	489999	benign	N
+Contig6_chr7_81650872_81657348	3786	chr7	81654636	ENSCAFT00000030050	ENSCAFP00000027927	S	501	C	480218	benign	N
+Contig122_chr5_57147596_57148457	360	chr5	57147964	ENSCAFT00000030140	ENSCAFP00000028007	T	713	I	479558	benign	N
+Contig83_chr20_58039274_58039724	380	chr20	58039649	ENSCAFT00000030192	ENSCAFP00000028056	E	142	K	611866	benign	N
+Contig42_chr5_58023274_58024296	585	chr5	58023845	ENSCAFT00000030282	ENSCAFP00000028135	V	415	A	489580	benign	N
+Contig248_chr20_58217741_58219717	751	chr20	58218495	ENSCAFT00000030285	ENSCAFP00000028138	G	278	S	485038	unknown	N
+Contig127_chr6_39501489_39501966	83	chr6	39501576	ENSCAFT00000030381	ENSCAFP00000028228	N	155	S	490020	benign	N
+Contig123_chr6_39499974_39501056	816	chr6	39500798	ENSCAFT00000030381	ENSCAFP00000028228	A	195	P	490020	benign	N
+Contig247_chr6_39576694_39577607	493	chr6	39577171	ENSCAFT00000030386	ENSCAFP00000028233	S	745	N	490021	benign	N
+Contig6_chr9_50725202_50725646	143	chr9	50725344	ENSCAFT00000030726	ENSCAFP00000028560	M	12	T	491218	benign	N
+Contig221_chr6_41879771_41881379	766	chr6	41880519	ENSCAFT00000030883	ENSCAFP00000028717	A	184	T	606755	benign	N
+Contig231_chr5_60474911_60475630	279	chr5	60475186	ENSCAFT00000030960	ENSCAFP00000028794	C	505	Y	489618	possibly damaging	N
+Contig99_chr5_63306202_63308496	2063	chr5	63308224	ENSCAFT00000031146	ENSCAFP00000028978	A	421	V	U	unknown	N
+Contig245_chr5_66149146_66149848	349	chr5	66149499	ENSCAFT00000031407	ENSCAFP00000029234	R	207	Q	479601	benign	cfa00760=Nicotinate and nicotinamide metabolism.cfa01100=Metabolic pathways
+Contig305_chr5_67253589_67254394	375	chr5	67253954	ENSCAFT00000031570	ENSCAFP00000029391	R	203	Q	U	possibly damaging	N
+Contig94_chr9_56873843_56875505	1578	chr9	56875408	ENSCAFT00000031743	ENSCAFP00000029555	P	2937	S	U	benign	N
+Contig107_chr5_71317862_71318113	71	chr5	71317944	ENSCAFT00000031781	ENSCAFP00000029590	M	281	V	U	benign	N
+Contig134_chr9_57426140_57427208	236	chr9	57426380	ENSCAFT00000031798	ENSCAFP00000029606	V	89	I	480698	benign	cfa00590=Arachidonic acid metabolism.cfa01100=Metabolic pathways
+Contig60_chr12_5631507_5632392	818	chr12	5632313	ENSCAFT00000031814	ENSCAFP00000029621	Y	1697	C	481734	unknown	cfa04510=Focal adhesion.cfa04512=ECM-receptor interaction.cfa04974=Protein digestion and absorption.cfa05146=Amoebiasis
+Contig132_chr5_73710776_73711271	149	chr5	73710927	ENSCAFT00000031848	ENSCAFP00000029653	T	1323	M	489696	probably damaging	N
+Contig39_chr9_59278364_59279024	398	chr9	59278757	ENSCAFT00000032068	ENSCAFP00000029863	A	957	T	480718	benign	N
+Contig177_chr9_61212763_61213621	700	chr9	61213430	ENSCAFT00000032171	ENSCAFP00000029958	D	79	N	U	benign	N
+Contig67_chr6_62507717_62510152	1055	chr6	62508787	ENSCAFT00000032186	ENSCAFP00000029972	I	212	M	479959	probably damaging	cfa00380=Tryptophan metabolism.cfa00450=Selenocompound metabolism.cfa01100=Metabolic pathways
+Contig66_chr6_64570039_64570630	325	chr6	64570365	ENSCAFT00000032239	ENSCAFP00000030024	A	862	G	479964	benign	cfa04740=Olfactory transduction.cfa04972=Pancreatic secretion
+Contig22_chr6_64809414_64810661	626	chr6	64810027	ENSCAFT00000032269	ENSCAFP00000030052	Q	559	K	490179	benign	N
+Contig50_chr5_85052459_85052865	55	chr5	85052515	ENSCAFT00000032431	ENSCAFP00000030201	S	32	G	479688	benign	N
+Contig25_chr5_85095840_85098495	1627	chr5	85097474	ENSCAFT00000032433	ENSCAFP00000030203	F	681	S	U	benign	N
+Contig25_chr5_85480673_85480982	186	chr5	85480860	ENSCAFT00000032493	ENSCAFP00000030260	A	180	T	610026	possibly damaging	cfa05010=Alzheimer's disease
+Contig19_chr5_24601128_24602241	685	chr5	24601813	ENSCAFT00000035141	ENSCAFP00000030364	T	695	S	U	benign	N
+Contig59_chr26_11519273_11520242	659	chr26	11519937	ENSCAFT00000035276	ENSCAFP00000030520	P	160	L	403557	probably damaging	cfa03015=mRNA surveillance pathway.cfa04114=Oocyte meiosis.cfa04270=Vascular smooth muscle contraction.cfa04510=Focal adhesion.cfa04720=Long-term potentiation.cfa04728=Dopaminergic synapse.cfa04810=Regulation of actin cytoskeleton.cfa04910=Insulin signaling pathway.cfa05168=Herpes simplex infection
+Contig27_chr12_23130802_23131771	353	chr12	23131154	ENSCAFT00000035307	ENSCAFP00000030552	V	565	M	474935	probably damaging	N
+Contig31_chr1_8052327_8053606	234	chr1	8052570	ENSCAFT00000035442	ENSCAFP00000030703	C	153	S	U	possibly damaging	N
+Contig59_chr20_40539078_40540678	1223	chr20	40540302	ENSCAFT00000035532	ENSCAFP00000030804	H	285	R	403502	benign	cfa04620=Toll-like receptor signaling pathway.cfa05142=Chagas disease (American trypanosomiasis).cfa05143=African trypanosomiasis.cfa05144=Malaria.cfa05152=Tuberculosis.cfa05162=Measles.cfa05168=Herpes simplex infection
+Contig152_chr6_25356961_25358151	701	chr6	25357665	ENSCAFT00000035750	ENSCAFP00000031044	P	479	S	608555	benign	cfa04142=Lysosome
+Contig18_chr9_58576258_58576773	215	chr9	58576474	ENSCAFT00000035914	ENSCAFP00000031224	K	118	E	480706	benign	N
+Contig8_chr15_38734005_38734403	242	chr15	38734244	ENSCAFT00000035916	ENSCAFP00000031226	A	237	V	611996	possibly damaging	N
+Contig76_chr3_30625909_30626247	159	chr3	30626069	ENSCAFT00000036198	ENSCAFP00000031549	T	135	S	479171	benign	cfa00260=Glycine, serine and threonine metabolism.cfa00270=Cysteine and methionine metabolism.cfa01100=Metabolic pathways
+Contig86_chr37_14528768_14530343	873	chr37	14529628	ENSCAFT00000036570	ENSCAFP00000031969	V	738	D	478875.609202	possibly damaging	cfa04060=Cytokine-cytokine receptor interaction.cfa04350=TGF-beta signaling pathway
+Contig9_chr5_54124181_54125739	1134	chr5	54125291	ENSCAFT00000036640	ENSCAFP00000032043	A	187	T	610286	benign	N
+Contig107_chr9_8990420_8991676	1178	chr9	8991591	ENSCAFT00000036774	ENSCAFP00000032186	T	55	M	483288	benign	N
+Contig47_chr12_20319418_20320775	1212	chr12	20320622	ENSCAFT00000036825	ENSCAFP00000032241	K	606	T	474930	benign	cfa00280=Valine, leucine and isoleucine degradation.cfa00630=Glyoxylate and dicarboxylate metabolism.cfa00640=Propanoate metabolism.cfa01100=Metabolic pathways
+Contig4_chr2_45195542_45196115	233	chr2	45195785	ENSCAFT00000037022	ENSCAFP00000032463	D	833	N	478055	possibly damaging	N
+Contig8_chr8_77227029_77227651	339	chr8	77227366	ENSCAFT00000037096	ENSCAFP00000032544	T	61	A	490895.612602	benign	cfa04020=Calcium signaling pathway.cfa04145=Phagosome.cfa04640=Hematopoietic cell lineage.cfa04650=Natural killer cell mediated cytotoxicity.cfa04662=B cell receptor signaling pathway.cfa04664=Fc epsilon RI signaling pathway.cfa04666=Fc gamma R-mediated phagocytosis.cfa04672=Intestinal immune network for IgA production.cfa05140=Leishmaniasis.cfa05143=African trypanosomiasis.cfa05146=Amoebiasis.cfa05150=Staphylococcus aureus infection.cfa05152=Tuberculosis.cfa05162=Measles.cfa05310=Asthma.cfa05320=Autoimmune thyroid disease.cfa05322=Systemic lupus erythematosus.cfa05323=Rheumatoid arthritis.cfa05330=Allograft rejection.cfa05340=Primary immunodeficiency.cfa05414=Dilated cardiomyopathy.cfa05416=Viral myocarditis
+Contig2_chr7_60049092_60051693	266	chr7	60049361	ENSCAFT00000038176	ENSCAFP00000033857	T	195	M	U	probably damaging	N
+Contig31_chr30_24179816_24187402	4867	chr30	24184686	ENSCAFT00000038211	ENSCAFP00000033897	G	103	S	U	benign	N
+Contig9_chr27_48250956_48251793	192	chr27	48251161	ENSCAFT00000038256	ENSCAFP00000033944	T	166	M	477739	probably damaging	N
+Contig45_chr27_43537046_43537944	568	chr27	43537599	ENSCAFT00000038301	ENSCAFP00000033996	M	69	I	611773	benign	cfa04010=MAPK signaling pathway.cfa04810=Regulation of actin cytoskeleton.cfa05200=Pathways in cancer.cfa05218=Melanoma
+Contig133_chr18_28371600_28372547	83	chr18	28371695	ENSCAFT00000038383	ENSCAFP00000034090	L	102	Q	475933	probably damaging	N
+Contig11_chr28_8532951_8533892	511	chr28	8533462	ENSCAFT00000038937	ENSCAFP00000034728	R	19	C	477763	probably damaging	cfa03008=Ribosome biogenesis in eukaryotes.cfa03013=RNA transport
+Contig1_chr14_5733966_5735336	783	chr14	5734754	ENSCAFT00000039094	ENSCAFP00000034905	A	166	T	U	benign	N
+Contig48_chr27_6001075_6001818	392	chr27	6001478	ENSCAFT00000039109	ENSCAFP00000034919	R	103	H	U	probably damaging	N
+Contig40_chr11_43589173_43590288	973	chr11	43590138	ENSCAFT00000039148	ENSCAFP00000034962	R	1617	P	481557	benign	N
+Contig1_chr14_30424688_30425258	179	chr14	30424861	ENSCAFT00000039390	ENSCAFP00000035239	T	648	I	475245	benign	cfa04666=Fc gamma R-mediated phagocytosis.cfa04810=Regulation of actin cytoskeleton
+Contig58_chr8_7461111_7462065	323	chr8	7461423	ENSCAFT00000039451	ENSCAFP00000035309	L	112	F	U	benign	N
+Contig1_chr25_43094809_43095852	908	chr25	43095708	ENSCAFT00000039609	ENSCAFP00000035483	W	18	G	U	unknown	N
+Contig114_chr25_43076436_43076800	141	chr25	43076581	ENSCAFT00000039609	ENSCAFP00000035483	S	45	C	U	unknown	N
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/test-data/test_in/sample.gd_snp	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,402 @@
+#{"column_names":["scaf","pos","A","B","qual","ref","rpos","rnuc","1A","1B","1G","1Q","2A","2B","2G","2Q","3A","3B","3G","3Q","4A","4B","4G","4Q","5A","5B","5G","5Q","6A","6B","6G","6Q","pair","dist",
+#"prim","rflp"],"dbkey":"canFam2","individuals":[["PB1",9],["PB2",13],["PB3",17],["PB4",21],["PB6",25],["PB8",29]],"pos":2,"rPos":7,"ref":6,"scaffold":1,"species":"bear"}
+Contig161_chr1_4641264_4641879	115	C	T	73.5	chr1	4641382	C	6	0	2	45	8	0	2	51	15	0	2	72	5	0	2	42	6	0	2	45	10	0	2	57	Y	54	0.323	0
+Contig48_chr1_10150253_10151311	11	A	G	94.3	chr1	10150264	A	1	0	2	30	1	0	2	30	1	0	2	30	3	0	2	36	1	0	2	30	1	0	2	30	Y	22	+99.	0
+Contig20_chr1_21313469_21313570	66	C	T	54.0	chr1	21313534	C	4	0	2	39	4	0	2	39	5	0	2	42	4	0	2	39	4	0	2	39	5	0	2	42	N	1	+99.	0
+Contig86_chr1_30984450_30985684	670	C	T	365.0	chr1	30985133	C	9	0	2	54	10	0	2	57	13	0	2	66	3	0	2	36	9	0	2	54	7	0	2	48	Y	145	0.031	0
+Contig5_chr1_32562160_32563940	1215	G	T	163.0	chr1	32563356	G	17	0	2	78	19	0	2	84	20	0	2	87	14	0	2	69	12	0	2	63	10	0	2	57	Y	17	0.251	0
+Contig110_chr1_33385093_33386888	510	C	T	270.0	chr1	33385587	A	14	0	2	69	11	0	2	60	19	0	2	84	11	0	2	60	10	0	2	57	13	0	2	66	Y	13	0.126	0
+Contig100_chr1_33562920_33564288	743	C	T	178.0	chr1	33563655	C	6	0	2	45	10	0	2	57	8	0	2	51	5	0	2	42	13	0	2	66	7	0	2	48	Y	13	0.090	3
+Contig7_chr1_37302355_37302489	97	A	G	59.2	chr1	37302452	G	3	0	2	36	8	0	2	51	5	0	2	42	8	0	2	51	7	0	2	48	6	0	2	45	N	56	2.812	0
+Contig62_chr1_41880715_41882180	1078	T	G	57.6	chr1	41881785	T	14	0	2	69	15	0	2	72	16	0	2	75	13	0	2	66	8	0	2	51	10	0	2	57	Y	21	0.477	0
+Contig47_chr1_48409178_48409384	37	C	T	134.0	chr1	48409215	T	5	0	2	42	6	0	2	45	8	0	2	51	9	0	2	54	4	0	2	39	6	0	2	45	N	66	+99.	0
+Contig119_chr1_49647683_49650077	1618	C	A	99.7	chr1	49649276	A	8	0	2	51	11	0	2	60	10	0	2	57	9	0	2	54	10	0	2	57	14	0	2	69	Y	16	0.166	0
+Contig21_chr1_60697952_60699446	307	G	A	51.9	chr1	60698265	G	12	0	2	63	9	0	2	54	4	0	2	39	6	0	2	45	9	0	2	54	4	0	2	39	Y	98	0.507	0
+Contig131_chr1_62319542_62320564	169	C	G	103.0	chr1	62319709	C	12	0	2	63	12	0	2	66	14	0	2	69	12	0	2	63	9	0	2	54	9	0	2	54	Y	73	0.307	1
+Contig14_chr1_63450425_63450680	101	T	A	102.0	chr1	63450530	T	8	0	2	51	10	0	2	57	18	0	2	81	8	0	2	51	8	0	2	34	8	0	2	51	N	99	1.085	0
+Contig83_chr1_63869778_63869942	40	T	C	23.7	chr1	63869819	C	5	0	2	42	7	0	2	48	2	0	2	33	4	0	2	39	6	0	2	48	4	0	2	39	N	654	1.364	0
+Contig30_chr1_64702572_64703138	178	A	T	117.0	chr1	64702750	T	10	0	2	57	10	0	2	57	20	0	2	87	21	0	2	90	6	0	2	45	12	0	2	63	Y	50	3.872	0
+Contig101_chr1_69868406_69868872	287	G	A	14.6	chr1	69868689	G	13	0	2	66	17	0	2	78	10	0	2	57	8	0	2	51	7	0	2	48	8	0	2	51	N	137	0.305	0
+Contig35_chr1_74482577_74482791	170	G	A	45.4	chr1	74482751	A	3	0	2	36	4	0	2	39	13	0	2	66	2	0	2	33	5	0	2	42	2	0	2	33	N	20	+99.	3
+Contig49_chr1_83865731_83865944	85	G	A	34.1	chr1	-1	N	4	0	2	39	4	0	2	39	8	0	2	51	2	0	2	33	5	0	2	42	4	0	2	39	N	-1	1.485	0
+Contig64_chr1_87343284_87345672	163	T	A	3.76	chr1	87343443	C	0	2	2	1	0	0	-1	0	5	0	2	42	2	0	2	33	0	1	2	14	0	0	-1	0	N	3	0.039	2
+Contig20_chr1_110679280_110679687	181	C	T	87.4	chr1	110679454	-	1	0	2	30	7	0	2	48	4	0	2	39	2	0	2	33	2	0	2	33	0	0	-1	0	N	31	0.660	2
+Contig129_chr1_117547123_117548666	926	G	A	126.0	chr1	117548059	G	19	0	2	84	9	0	2	54	11	0	2	60	10	0	2	57	12	0	2	63	11	0	2	60	Y	64	0.049	0
+Contig7_chr1_125154638_125154844	190	G	T	130.0	chr1	125154818	A	5	0	2	42	4	0	2	39	7	0	2	48	2	0	2	33	7	0	2	48	4	0	2	39	N	33	+99.	0
+Contig222_chr2_9817738_9818143	220	C	T	888.0	chr2	9817960	C	17	0	2	78	12	0	2	63	20	0	2	87	8	0	2	51	11	0	2	60	12	0	2	63	Y	76	0.093	1
+Contig47_chr2_25470778_25471576	126	G	A	888.0	chr2	25470896	G	12	0	2	63	14	0	2	69	14	0	2	69	10	0	2	57	18	0	2	81	13	0	2	66	N	11	0.289	1
+Contig10_chr2_40859744_40860534	637	G	A	888.0	chr2	40860397	A	3	0	2	36	3	0	2	36	2	0	2	33	7	0	2	48	6	0	2	45	8	0	2	51	Y	42	1.435	0
+Contig52_chr2_41421981_41422725	604	C	A	888.0	chr2	41422583	A	17	0	2	78	18	0	2	81	14	0	2	69	17	0	2	78	12	0	2	63	14	0	2	69	Y	44	0.882	0
+Contig94_chr2_43869105_43870358	220	G	A	888.0	chr2	43869333	G	12	0	2	63	18	0	2	81	11	0	2	60	15	0	2	72	12	0	2	63	13	0	2	66	Y	1	0.156	0
+Contig34_chr2_48444129_48444939	695	C	T	134.0	chr2	48444828	C	14	0	2	69	8	0	2	51	16	0	2	75	17	0	2	78	9	0	2	54	15	0	2	72	Y	161	0.375	0
+Contig6_chr2_56859179_56859956	671	T	C	999.9	chr2	56859851	T	15	0	2	72	18	0	2	81	20	0	2	90	19	0	2	84	19	0	2	84	24	0	2	99	N	28	5.308	1
+Contig115_chr2_61631913_61632510	310	G	T	999.3	chr2	61632216	G	7	0	2	48	9	0	2	54	7	0	2	48	11	0	2	60	10	0	2	57	10	0	2	57	N	13	0.184	0
+Contig31_chr2_67331584_67331785	39	C	T	999.0	chr2	67331623	C	11	0	2	60	10	0	2	57	7	0	2	48	9	0	2	54	2	0	2	33	4	0	2	39	N	110	0.647	1
+Contig92_chr2_75906683_75907774	773	T	C	85.4	chr2	75907438	C	12	0	2	63	12	0	2	63	17	0	2	78	8	0	2	51	8	0	2	51	13	0	2	66	Y	93	0.166	0
+Contig163_chr2_76402959_76404830	221	C	T	127.0	chr2	76403181	C	4	0	2	42	10	0	2	57	9	0	2	54	11	0	2	60	7	0	2	48	9	0	2	54	Y	54	0.178	1
+Contig177_chr2_79559305_79560033	168	C	T	5.67	chr2	79559476	A	2	0	2	33	3	0	2	36	1	0	2	30	2	0	2	33	0	0	-1	0	1	0	2	30	N	56	0.257	0
+Contig8_chr2_82945728_82945839	61	T	C	223.0	chr2	-1	N	2	0	2	33	4	0	2	39	9	0	2	54	3	0	2	36	5	0	2	42	0	0	-1	0	N	-1	+99.	1
+Contig59_chr2_85243022_85243758	506	G	A	96.3	chr2	85243509	T	9	0	2	54	11	0	2	60	12	0	2	63	14	0	2	69	10	0	2	57	7	0	2	48	Y	6	0.459	0
+Contig56_chr3_17326225_17327548	387	G	C	91.2	chr3	17326591	G	14	0	2	69	13	0	2	66	15	0	2	72	15	0	2	72	13	0	2	66	12	0	2	63	Y	20	0.225	3
+Contig108_chr3_46210055_46210874	367	A	G	21.0	chr3	46210423	A	19	0	2	84	10	0	2	57	16	0	2	75	14	0	2	69	20	0	2	87	11	0	2	60	N	236	0.028	1
+Contig16_chr3_47113407_47114449	322	G	A	105.0	chr3	47113713	G	13	0	2	66	17	0	2	78	15	0	2	72	6	0	2	45	11	0	2	60	11	0	2	60	Y	114	0.132	5
+Contig3_chr3_47564810_47565251	262	T	G	112.0	chr3	47565104	T	14	0	2	69	16	0	2	75	20	0	2	87	10	0	2	57	9	0	2	54	8	0	2	51	Y	24	0.073	1
+Contig35_chr3_49662401_49662929	270	A	T	96.1	chr3	49662652	A	14	0	2	69	11	0	2	60	23	0	2	96	13	0	2	66	12	0	2	63	11	0	2	60	Y	36	3.583	2
+Contig97_chr3_49820354_49821631	1069	G	A	44.1	chr3	49821402	G	9	0	2	54	9	0	2	54	6	0	2	45	10	0	2	57	5	0	2	42	8	0	2	51	N	6	0.201	2
+Contig1_chr3_51588422_51589409	926	A	G	51.0	chr3	51589353	G	2	0	2	33	2	0	2	33	6	0	2	45	4	0	2	39	9	0	2	54	11	0	2	60	N	21	1.147	0
+Contig25_chr3_53260697_53262560	402	G	A	211.0	chr3	53261095	G	17	0	2	78	14	0	2	69	15	0	2	75	12	0	2	63	14	0	2	69	12	0	2	63	Y	116	1.033	0
+Contig11_chr3_53992739_53995954	2392	G	A	82.4	chr3	53995143	A	12	0	2	66	11	0	2	60	14	0	2	69	6	0	2	45	11	0	2	60	17	0	2	78	Y	358	0.321	1
+Contig236_chr3_72676275_72676473	128	G	A	278.0	chr3	72676410	G	12	0	2	63	11	0	2	60	13	0	2	66	10	0	2	57	11	0	2	60	8	0	2	51	N	36	0.496	1
+Contig48_chr3_74792236_74792388	63	T	C	111.0	chr3	74792289	-	17	0	2	78	9	0	2	54	9	0	2	54	5	0	2	42	11	0	2	60	9	0	2	54	N	-1	3.528	0
+Contig65_chr3_80727952_80728283	39	T	C	71.2	chr3	80727990	T	7	0	2	48	3	0	2	36	8	0	2	51	6	0	2	45	8	0	2	51	11	0	2	60	N	22	7.078	0
+Contig53_chr3_86407941_86409349	1406	G	A	86.9	chr3	86409317	A	5	0	2	42	5	0	2	42	4	0	2	39	10	0	2	57	8	0	2	51	12	0	2	63	N	14	3.285	1
+Contig13_chr3_92409738_92412300	718	A	G	23.3	chr3	92410450	A	12	0	2	63	16	0	2	75	18	0	2	81	13	0	2	66	22	0	2	93	7	0	2	48	Y	23	0.224	2
+Contig134_chr4_12145648_12148225	1326	C	T	164.0	chr4	12146961	C	9	0	2	54	8	0	2	51	7	0	2	48	3	0	2	36	5	0	2	42	5	0	2	42	Y	4	0.080	1
+Contig88_chr4_15557471_15557833	268	A	G	145.0	chr4	15557737	A	6	0	2	45	6	0	2	45	11	0	2	60	9	0	2	54	5	0	2	42	6	0	2	45	Y	46	4.138	0
+Contig53_chr4_18823968_18824478	149	A	G	91.3	chr4	18824115	A	18	0	2	81	15	0	2	72	21	0	2	90	13	0	2	66	9	0	2	54	12	0	2	63	N	51	0.251	0
+Contig86_chr4_24953866_24956222	1985	C	T	76.4	chr4	24955841	T	8	0	2	51	1	0	2	30	3	0	2	36	7	0	2	48	2	0	2	33	6	0	2	45	Y	12	0.357	0
+Contig19_chr4_26233601_26233991	146	G	C	51.6	chr4	26233744	G	10	0	2	57	8	0	2	51	9	0	2	54	5	0	2	42	9	0	2	54	4	0	2	39	N	41	0.163	3
+Contig78_chr4_28579975_28580134	30	T	G	19.6	chr4	28579994	-	4	0	2	39	3	0	2	36	5	0	2	42	4	0	2	39	2	0	2	33	2	0	2	33	N	33	0.499	0
+Contig16_chr4_30177226_30179725	621	C	T	88.4	chr4	30177859	C	20	0	2	87	13	0	2	66	13	0	2	66	11	0	2	60	8	0	2	51	8	0	2	51	Y	45	0.797	1
+Contig30_chr4_46196500_46197672	1045	A	C	33.4	chr4	46197522	C	16	0	2	75	9	0	2	54	4	0	2	39	7	0	2	48	14	0	2	69	6	0	2	45	Y	43	0.306	0
+Contig2_chr4_47039007_47039323	158	G	C	35.1	chr4	47039160	-	8	0	2	51	9	0	2	54	13	0	2	66	8	0	2	51	10	0	2	60	9	0	2	54	N	0	0.131	0
+Contig17_chr4_61310346_61311158	267	C	T	49.9	chr4	61310604	T	10	0	2	57	7	0	2	48	9	0	2	54	10	0	2	57	14	0	2	69	7	0	2	48	Y	219	0.098	0
+Contig26_chr4_64190783_64191295	64	A	G	162.0	chr4	64190843	A	10	0	2	57	6	0	2	45	20	0	2	87	12	0	2	63	17	0	2	78	7	0	2	48	Y	306	7.428	0
+Contig11_chr4_65500960_65501654	634	T	C	107.0	chr4	65501585	T	13	0	2	66	14	0	2	69	13	0	2	66	13	0	2	66	6	0	2	45	18	0	2	81	Y	10	6.849	0
+Contig38_chr4_67768488_67768982	113	A	G	102.0	chr4	67768598	A	9	0	2	54	8	0	2	51	9	0	2	54	11	0	2	60	10	0	2	57	7	0	2	48	Y	188	3.175	0
+Contig30_chr4_70978564_70979580	596	A	G	164.0	chr4	70979151	A	15	0	2	72	12	0	2	63	20	0	2	87	14	0	2	69	15	0	2	72	15	0	2	72	Y	111	2.458	2
+Contig72_chr4_74225793_74226492	674	A	G	110.0	chr4	74226472	A	5	0	2	42	3	0	2	36	2	0	2	33	3	0	2	36	7	0	2	48	4	0	2	39	Y	115	+99.	1
+Contig32_chr4_75618955_75620254	301	T	C	333.0	chr4	75619257	C	10	0	2	57	8	0	2	51	12	0	2	63	20	0	2	87	12	0	2	63	14	0	2	69	Y	34	0.163	2
+Contig31_chr5_4734956_4736547	1166	C	T	133.0	chr5	4736132	C	14	0	2	69	8	0	2	51	17	0	2	78	4	0	2	39	9	0	2	54	12	0	2	63	Y	1	0.021	0
+Contig113_chr5_11052263_11052603	28	C	T	38.2	chr5	11052280	C	1	2	1	12	3	2	1	10	5	0	2	42	2	1	2	13	3	0	2	36	8	0	2	51	Y	161	+99.	0
+Contig30_chr5_15698241_15699076	396	G	T	76.6	chr5	15698633	T	8	0	2	51	9	0	2	54	10	0	2	57	7	0	2	48	11	0	2	60	8	0	2	54	Y	65	0.009	0
+Contig36_chr5_17709244_17710004	373	T	C	281.0	chr5	17709624	T	6	0	2	45	9	0	2	54	7	0	2	48	4	0	2	39	10	0	2	57	4	0	2	39	Y	16	0.131	0
+Contig13_chr5_21881138_21881562	227	A	G	251.0	chr5	21881356	A	11	0	2	60	20	0	2	87	22	0	2	93	10	0	2	57	10	0	2	57	21	0	2	90	Y	182	2.013	0
+Contig5_chr5_23188121_23190168	1841	C	T	141.0	chr5	23189975	C	20	0	2	87	19	0	2	84	22	0	2	93	16	0	2	75	18	0	2	81	14	0	2	69	N	45	0.355	0
+Contig6_chr5_26899813_26900498	97	A	C	88.6	chr5	26899910	A	15	0	2	72	14	0	2	69	27	0	2	108	15	0	2	72	13	0	2	69	12	0	2	63	Y	92	7.370	3
+Contig314_chr5_34019166_34019319	72	C	A	20.1	chr5	-1	N	6	0	2	45	9	0	2	54	4	0	2	39	4	0	2	39	9	0	2	54	5	0	2	42	N	-1	+99.	4
+Contig147_chr5_38980258_38980559	221	C	T	40.8	chr5	38980477	C	15	0	2	72	15	0	2	72	19	0	2	84	10	0	2	57	12	0	2	63	20	0	2	87	Y	11	4.576	0
+Contig115_chr5_48119079_48120169	151	C	T	78.3	chr5	48119234	C	17	0	2	78	10	0	2	57	14	0	2	69	16	0	2	75	8	0	2	51	12	0	2	63	Y	205	0.320	0
+Contig45_chr5_50892738_50892968	169	C	A	25.8	chr5	50892911	C	10	0	2	57	7	0	2	48	10	0	2	60	6	0	2	45	6	0	2	45	13	0	2	66	N	244	0.497	1
+Contig40_chr5_51484164_51484696	14	A	G	53.3	chr5	51484180	A	6	0	2	45	4	0	2	39	4	0	2	39	3	0	2	36	0	0	2	13	3	0	2	36	N	63	+99.	1
+Contig40_chr5_51664286_51667573	861	C	T	148.0	chr5	51665149	C	20	0	2	87	21	0	2	90	20	0	2	87	11	0	2	60	16	0	2	75	15	0	2	72	Y	207	0.080	1
+Contig15_chr5_51889708_51891244	882	A	G	149.0	chr5	51890581	G	13	0	2	66	18	0	2	81	17	0	2	78	22	0	2	93	15	0	2	72	22	0	2	93	Y	7	0.025	1
+Contig143_chr5_57231364_57232010	294	T	C	78.5	chr5	57231644	T	3	0	2	36	5	0	2	42	4	0	2	39	2	0	2	33	10	0	2	57	6	0	2	45	Y	73	0.337	2
+Contig13_chr5_57609985_57610584	496	C	T	50.5	chr5	57610476	C	17	0	2	78	9	0	2	54	6	0	2	45	8	0	2	51	10	0	2	57	12	0	2	63	N	77	2.022	1
+Contig230_chr5_58486998_58487280	227	T	C	192.0	chr5	58487232	T	3	0	2	36	4	0	2	39	9	0	2	54	6	0	2	45	4	0	2	39	7	0	2	48	N	24	0.100	2
+Contig385_chr5_60122961_60123128	15	C	G	136.0	chr5	60122976	C	0	0	-1	0	0	0	-1	0	1	0	2	30	1	0	2	30	3	0	2	36	0	0	-1	0	N	100	+99.	2
+Contig143_chr5_65121393_65122035	558	C	A	127.0	chr5	65121959	A	0	0	-1	0	5	0	2	42	3	0	2	36	4	0	2	39	0	0	-1	0	4	0	2	39	Y	285	0.391	1
+Contig32_chr5_70852360_70853289	282	G	A	114.0	chr5	70852623	G	16	0	2	75	11	0	2	60	13	0	2	66	12	0	2	63	13	0	2	66	7	0	2	48	Y	33	0.276	0
+Contig215_chr5_70946445_70947428	363	T	G	28.2	chr5	70946809	C	4	0	2	39	0	5	0	12	9	0	2	54	6	0	2	45	3	3	2	1	9	0	2	54	N	43	0.153	0
+Contig100_chr5_71189678_71190590	813	C	T	30.8	chr5	71190523	C	11	0	2	60	11	0	2	60	9	0	2	54	10	0	2	57	6	0	2	45	13	0	2	66	Y	8	0.362	1
+Contig45_chr5_76133561_76134403	388	A	G	103.0	chr5	76133941	G	3	0	2	36	8	0	2	51	8	0	2	51	5	0	2	42	6	0	2	45	7	0	2	48	Y	57	0.038	0
+Contig61_chr5_90202541_90204393	909	C	T	101.0	chr5	90203461	T	7	0	2	48	5	0	2	42	14	0	2	69	3	0	2	36	5	0	2	42	8	0	2	51	Y	64	1.448	0
+Contig111_chr6_5821219_5822519	1060	A	G	68.1	chr6	5822321	T	7	0	2	48	6	0	2	45	11	0	2	60	9	0	2	54	3	0	2	36	12	0	2	63	Y	7	0.231	1
+Contig220_chr6_10671338_10672441	999	T	C	36.3	chr6	10672322	T	5	0	2	42	11	0	2	60	11	0	2	60	8	0	2	51	5	0	2	42	9	0	2	54	Y	1	1.667	0
+Contig226_chr6_17361986_17362884	418	G	C	251.0	chr6	17362406	G	6	0	2	45	8	0	2	51	7	0	2	48	9	0	2	54	7	0	2	48	7	0	2	48	Y	7	0.147	0
+Contig380_chr6_18173971_18174169	180	C	T	4.87	chr6	18174144	T	0	0	-1	0	4	0	2	39	7	0	2	48	2	0	2	33	2	0	2	33	1	0	2	30	N	56	2.589	0
+Contig51_chr6_20231207_20231785	161	A	G	70.5	chr6	20231375	G	13	0	2	66	5	0	2	42	8	0	2	51	2	0	2	36	5	0	2	42	5	0	2	42	Y	153	1.754	0
+Contig102_chr6_30271329_30271577	39	T	G	139.0	chr6	30271371	G	3	0	2	36	4	0	2	39	6	0	2	45	1	0	2	30	4	0	2	39	4	0	2	39	N	15	1.159	0
+Contig217_chr6_31393824_31394218	97	G	A	115.0	chr6	31393921	G	9	0	2	54	19	0	2	84	15	0	2	72	12	0	2	63	7	0	2	48	10	0	2	57	N	45	0.477	0
+Contig186_chr6_31928098_31928245	73	G	A	117.0	chr6	-1	N	5	0	2	42	8	0	2	51	2	0	2	33	4	0	2	39	1	0	2	30	5	0	2	42	N	-1	0.276	1
+Contig52_chr6_33188498_33188724	123	G	A	59.0	chr6	-1	N	5	0	2	42	13	0	2	66	8	0	2	51	4	0	2	39	9	0	2	54	9	0	2	54	N	-1	0.880	1
+Contig102_chr6_38743009_38743435	290	A	G	178.0	chr6	38743311	A	11	0	2	60	13	0	2	66	9	0	2	54	11	0	2	60	12	0	2	63	13	0	2	66	Y	34	0.148	4
+Contig81_chr6_49018353_49019532	179	C	A	72.5	chr6	49018530	A	15	0	2	72	13	0	2	66	19	0	2	72	8	0	2	51	12	0	2	63	16	0	2	75	Y	15	0.145	1
+Contig112_chr6_51024554_51024851	100	A	G	121.0	chr6	51024654	A	10	0	2	57	12	0	2	63	9	0	2	54	13	0	2	66	14	0	2	69	17	0	2	78	N	75	4.287	0
+Contig40_chr6_51412751_51413807	227	T	C	94.5	chr6	51412975	C	5	0	2	42	8	0	2	51	7	0	2	48	9	0	2	54	11	0	2	60	10	0	2	57	Y	4	5.661	0
+Contig47_chr6_69073222_69074767	1315	T	C	212.0	chr6	69074558	T	20	0	2	87	17	0	2	78	18	0	2	81	12	0	2	63	17	0	2	78	7	0	2	48	Y	9	0.652	0
+Contig30_chr6_74848932_74849059	57	C	G	46.3	chr6	74848993	C	7	0	2	48	7	0	2	33	6	0	2	45	7	0	2	48	5	0	2	42	6	0	2	45	N	-1	+99.	1
+Contig84_chr7_6648683_6650255	1297	G	A	110.0	chr7	6649988	G	18	0	2	81	9	0	2	54	22	0	2	77	16	0	2	75	20	0	2	87	6	0	2	45	Y	83	0.166	0
+Contig239_chr7_13007379_13007700	275	A	G	39.8	chr7	13007642	A	8	0	2	51	5	0	2	42	8	0	2	51	3	0	2	36	3	0	2	36	5	0	2	42	N	46	1.511	3
+Contig119_chr7_18310707_18310948	23	A	T	133.0	chr7	18310729	A	6	0	2	45	5	0	2	42	10	0	2	57	5	0	2	42	2	0	2	33	2	0	2	33	N	4553	+99.	0
+Contig93_chr7_18513377_18513741	173	T	C	130.0	chr7	18513533	C	15	0	2	72	11	0	2	60	18	0	2	81	6	0	2	45	10	0	2	57	14	0	2	69	Y	115	0.174	0
+Contig133_chr7_19603333_19603776	414	C	G	31.9	chr7	19603734	G	10	0	2	57	4	0	2	39	4	0	2	39	5	0	2	42	9	0	2	54	9	0	2	54	N	78	+99.	5
+Contig132_chr7_20426224_20428145	1815	A	G	28.3	chr7	20428041	A	11	1	2	43	12	0	2	63	19	0	2	84	23	0	2	96	14	0	2	69	10	0	2	57	N	11	0.264	0
+Contig206_chr7_26281823_26282074	103	C	A	101.0	chr7	26281925	T	11	0	2	60	16	0	2	61	19	0	2	84	6	0	2	45	19	0	2	84	16	0	2	75	N	-1	0.947	1
+Contig116_chr7_45858984_45859111	38	T	C	73.2	chr7	-1	N	2	0	2	33	1	0	2	30	3	0	2	36	2	0	2	33	2	0	2	33	1	0	2	30	N	-1	3.442	0
+Contig38_chr7_50681997_50682600	42	T	C	92.4	chr7	50682037	G	6	0	2	45	2	0	2	33	10	0	2	57	12	0	2	63	5	0	2	42	6	0	2	45	Y	94	0.146	0
+Contig55_chr7_53147505_53148974	894	A	G	68.4	chr7	53148397	G	22	0	2	93	13	0	2	66	16	0	2	75	8	0	2	51	16	0	2	75	11	0	2	60	Y	19	0.060	0
+Contig4_chr7_53685534_53688206	1709	C	G	76.2	chr7	53687225	C	18	0	2	81	17	0	2	78	18	0	2	81	15	0	2	72	14	0	2	69	14	0	2	69	Y	32	0.659	1
+Contig61_chr7_55832923_55834065	506	T	C	185.0	chr7	55833450	C	9	0	2	54	10	0	2	57	22	0	2	93	12	0	2	63	12	0	2	63	7	0	2	48	Y	1	0.019	0
+Contig91_chr8_12804505_12805470	409	C	A	111.0	chr8	12804906	C	8	0	2	51	10	0	2	57	15	0	2	72	12	0	2	63	14	0	2	69	15	0	2	72	N	145	0.175	0
+Contig30_chr8_17147743_17147923	13	G	A	105.0	chr8	17147756	A	1	3	1	19	1	0	2	30	3	0	2	36	1	0	2	30	1	0	2	30	3	0	2	36	N	6	+99.	0
+Contig8_chr8_27811135_27812620	333	C	T	37.9	chr8	27811458	C	4	0	2	39	11	0	2	60	18	0	2	81	5	0	2	42	6	0	2	45	5	0	2	42	Y	1	0.272	0
+Contig66_chr8_28273102_28273660	175	G	C	81.6	chr8	28273263	T	9	0	2	54	17	0	2	78	19	0	2	84	8	0	2	51	16	0	2	75	19	0	2	84	Y	3	2.735	0
+Contig84_chr8_31375511_31376456	443	T	C	125.0	chr8	31375954	T	10	0	2	57	15	0	2	72	27	0	2	108	18	0	2	81	16	0	2	75	9	0	2	54	Y	2	0.650	0
+Contig18_chr8_32575859_32577431	264	T	C	151.0	chr8	32576124	T	20	0	2	87	14	0	2	69	17	0	2	78	14	0	2	69	13	0	2	66	14	0	2	69	Y	17	0.915	1
+Contig54_chr8_40913908_40916451	1275	G	A	175.0	chr8	40915190	G	10	0	2	57	8	0	2	51	11	0	2	60	7	0	2	48	8	0	2	51	9	0	2	54	Y	21	0.056	3
+Contig93_chr8_44658786_44659075	180	T	G	55.3	chr8	44658964	T	4	0	2	39	3	0	2	36	6	0	2	45	5	0	2	45	5	0	2	42	4	0	2	39	N	14	0.188	0
+Contig17_chr8_57490059_57490498	69	G	T	97.4	chr8	57490127	A	2	0	2	33	11	0	2	60	15	0	2	72	16	0	2	75	8	0	2	51	10	0	2	57	N	40	0.522	5
+Contig66_chr8_58562376_58563446	345	C	G	5.74	chr8	58562721	C	14	0	2	69	12	0	2	63	9	0	2	57	10	0	2	57	9	0	2	54	10	0	2	57	Y	6	0.685	0
+Contig44_chr8_71186368_71188207	1455	G	T	147.0	chr8	71187818	G	4	10	1	74	3	0	2	36	20	0	2	87	12	0	2	63	8	0	2	51	10	0	2	57	Y	88	0.036	0
+Contig73_chr9_29451535_29452248	616	A	G	24.7	chr9	29452127	G	4	0	2	39	7	0	2	48	1	0	2	30	4	0	2	39	7	0	2	48	6	0	2	45	N	49	0.448	4
+Contig96_chr9_39008495_39009278	215	A	C	98.7	chr9	39008708	C	7	0	2	48	13	0	2	66	28	0	2	111	16	0	2	75	17	0	2	78	17	0	2	78	Y	8	0.427	1
+Contig22_chr10_15505382_15505589	172	T	C	38.5	chr10	15505548	T	2	0	2	33	6	0	2	45	8	0	2	51	8	0	2	51	9	0	2	54	12	0	2	63	N	284	2.861	0
+Contig69_chr10_40547265_40548153	371	G	A	58.1	chr10	40547649	A	9	0	2	54	8	0	2	51	8	0	2	51	9	0	2	54	4	0	2	39	5	0	2	42	Y	20	0.138	4
+Contig63_chr10_42716594_42719945	1018	A	G	88.7	chr10	42717616	G	13	0	2	66	14	0	2	69	13	0	2	66	12	0	2	63	18	0	2	81	5	0	2	42	Y	25	1.740	0
+Contig22_chr10_43255307_43255570	81	C	A	37.2	chr10	43255383	C	15	0	2	72	18	0	2	81	22	0	2	93	16	0	2	75	11	0	2	60	12	0	2	63	N	62	0.450	0
+Contig9_chr10_51475063_51476054	770	C	T	57.3	chr10	51475839	C	6	0	2	45	16	0	2	75	16	0	2	75	13	0	2	66	9	0	2	54	9	2	2	21	N	80	0.394	0
+Contig42_chr10_53816543_53818392	1642	G	A	27.5	chr10	53818172	A	7	0	2	48	13	0	2	66	17	0	2	78	14	0	2	69	19	0	2	84	16	0	2	75	N	1	0.433	0
+Contig36_chr10_53992615_53993741	229	G	C	86.2	chr10	53992846	G	17	0	2	78	14	0	2	69	13	0	2	66	15	0	2	72	12	0	2	63	15	0	2	72	N	23	1.912	0
+Contig20_chr10_58141129_58141750	575	C	T	46.1	chr10	58141701	C	7	0	2	48	8	0	2	51	9	0	2	54	3	0	2	36	4	0	2	39	9	0	2	54	N	1	4.264	0
+Contig26_chr10_59510973_59511899	146	C	A	29.0	chr10	59511126	C	8	0	2	51	13	0	2	66	18	0	2	81	13	0	2	66	10	0	2	57	7	0	2	48	Y	208	1.077	0
+Contig72_chr11_7142765_7143772	146	G	A	152.0	chr11	7142911	A	8	0	2	51	8	0	2	51	24	0	2	99	10	0	2	57	17	0	2	78	11	0	2	60	Y	90	1.137	0
+Contig103_chr11_8844784_8845095	214	T	G	135.0	chr11	8844993	T	1	1	2	12	10	0	2	57	5	4	1	26	2	3	1	13	2	7	1	34	1	1	2	13	Y	75	0.731	0
+Contig9_chr11_9904571_9905983	1284	C	T	151.0	chr11	9905857	C	16	0	2	75	19	0	2	84	17	0	2	78	16	0	2	75	12	0	2	63	13	1	2	44	Y	11	0.422	1
+Contig35_chr11_22459883_22460855	714	T	G	54.9	chr11	22460577	T	3	0	2	36	1	0	2	30	3	0	2	36	2	0	2	33	2	0	2	33	0	0	-1	0	N	24	0.382	0
+Contig7_chr11_40017076_40017630	352	C	T	46.3	chr11	40017422	C	7	0	2	48	9	0	2	54	6	0	2	45	8	0	2	51	16	0	2	75	9	0	2	54	Y	44	0.336	0
+Contig108_chr11_42953408_42955156	367	A	G	89.4	chr11	42953779	A	17	0	2	78	11	0	2	60	14	0	2	69	20	0	2	87	14	0	2	69	17	0	2	78	Y	118	0.784	1
+Contig82_chr11_43490732_43490862	60	C	T	47.3	chr11	-1	N	0	0	-1	0	0	0	-1	0	1	0	2	30	3	0	2	36	1	1	2	19	1	0	2	30	N	-1	6.763	0
+Contig16_chr11_53408448_53408790	187	A	G	153.0	chr11	53408638	A	7	0	2	48	9	0	2	54	18	0	2	81	10	0	2	57	11	0	2	60	12	0	2	63	Y	116	1.367	0
+Contig21_chr12_18403415_18404381	586	G	T	34.5	chr12	18403983	-	13	0	2	66	16	0	2	75	25	0	2	102	12	0	2	63	12	0	2	63	14	0	2	69	Y	12	0.068	0
+Contig33_chr12_19804073_19804529	178	T	C	69.4	chr12	19804261	T	13	0	2	66	13	0	2	66	22	0	2	93	11	0	2	60	12	0	2	63	18	0	2	81	Y	11	1.571	0
+Contig41_chr12_25565452_25566993	475	G	T	6.29	chr12	25565926	G	15	0	2	72	14	0	2	69	10	0	2	57	15	0	2	72	18	0	2	81	19	0	2	84	N	10	2.231	1
+Contig9_chr12_27204351_27204696	239	A	G	145.0	chr12	27204587	A	7	0	2	48	8	0	2	51	12	0	2	63	8	0	2	51	11	0	2	60	11	0	2	60	Y	14	0.046	0
+Contig45_chr12_30548282_30550498	448	C	T	124.0	chr12	30548703	-	9	0	2	54	11	0	2	60	22	0	2	93	19	0	2	84	12	0	2	63	12	0	2	63	Y	66	0.305	0
+Contig46_chr12_35571846_35572563	58	G	C	83.2	chr12	35571906	G	4	0	2	39	10	0	2	57	11	0	2	60	6	0	2	45	10	0	2	57	6	0	2	45	Y	55	+99.	1
+Contig28_chr12_42075871_42076044	136	G	A	134.0	chr12	42076006	A	6	0	2	45	5	0	2	42	7	0	2	48	7	0	2	48	2	0	2	33	4	0	2	39	N	3	9.479	0
+Contig16_chr12_42386141_42387454	194	A	G	161.0	chr12	42386323	A	11	0	2	60	8	0	2	54	23	0	2	96	17	0	2	78	6	0	2	45	13	0	2	66	Y	7	0.927	1
+Contig42_chr12_44424628_44425829	255	A	G	84.4	chr12	44424879	A	12	0	2	63	19	0	2	84	23	0	2	96	15	0	2	72	18	0	2	81	14	0	2	69	Y	18	1.190	2
+Contig10_chr12_44447953_44449698	63	C	T	105.0	chr12	44448020	C	11	0	2	60	9	0	2	54	12	0	2	63	10	0	2	57	15	0	2	72	8	0	2	51	Y	31	11.791	0
+Contig5_chr12_53880670_53882675	1221	A	C	99.4	chr12	53881888	A	16	0	2	75	18	0	2	81	23	0	2	96	10	0	2	57	15	0	2	72	17	0	2	78	Y	31	0.061	0
+Contig86_chr12_56715356_56716464	818	T	C	166.0	chr12	56716164	T	20	0	2	87	16	0	2	75	16	0	2	75	14	0	2	69	13	0	2	66	7	0	2	48	Y	22	1.092	0
+Contig3_chr12_65021967_65024097	238	T	G	92.6	chr12	65022205	T	17	0	2	78	14	0	2	69	16	0	2	75	9	0	2	54	13	0	2	66	15	0	2	72	Y	258	0.117	0
+Contig43_chr12_66499742_66500010	121	G	T	41.5	chr12	66499866	G	12	0	2	63	4	0	2	39	8	0	2	51	6	0	2	45	10	0	2	57	6	0	2	45	N	42	0.421	0
+Contig14_chr12_71364692_71365311	20	A	C	103.0	chr12	71364712	A	7	0	2	48	3	0	2	36	5	0	2	42	1	0	2	30	2	0	2	33	3	0	2	36	Y	35	+99.	0
+Contig37_chr13_15910164_15910426	245	G	A	32.9	chr13	-1	N	3	4	1	41	4	0	2	39	3	0	2	36	4	0	2	39	3	0	2	36	10	0	2	57	N	-1	2.159	1
+Contig107_chr13_26045881_26046290	341	C	G	81.4	chr13	26046230	C	16	0	2	75	20	0	2	90	14	0	2	69	15	0	2	72	9	0	2	54	9	0	2	54	Y	51	4.510	0
+Contig251_chr13_28498333_28501066	864	T	G	296.0	chr13	28499180	T	3	0	2	36	5	0	2	42	4	0	2	39	2	0	2	33	5	0	2	42	6	0	2	45	Y	9	0.068	0
+Contig154_chr13_36777857_36778736	356	G	A	95.5	chr13	36778225	A	6	0	2	45	11	0	2	60	11	0	2	60	9	0	2	54	13	0	2	66	8	0	2	51	Y	59	0.192	0
+Contig37_chr13_42529793_42530857	150	G	T	192.0	chr13	42529926	G	18	0	2	81	14	0	2	69	16	0	2	75	14	0	2	69	8	0	2	51	11	0	2	60	N	22	0.795	5
+Contig47_chr13_47045833_47046626	257	A	C	28.5	chr13	47046097	A	13	0	2	66	10	0	2	57	17	0	2	78	20	0	2	87	15	0	2	72	9	0	2	57	N	129	0.468	0
+Contig42_chr13_47730018_47730856	254	A	G	75.1	chr13	47730294	A	13	0	2	66	6	0	2	45	12	0	2	63	9	0	2	54	16	0	2	75	11	0	2	63	Y	630	0.049	1
+Contig55_chr13_53467708_53468101	221	T	G	132.0	chr13	53467925	T	25	0	2	102	12	0	2	63	26	0	2	105	7	0	2	48	16	0	2	75	16	0	2	75	N	20	5.717	1
+Contig49_chr13_55103679_55105532	503	G	A	76.0	chr13	55104178	G	21	0	2	90	19	0	2	84	18	0	2	81	20	0	2	87	8	9	1	89	17	0	2	78	Y	20	0.259	1
+Contig42_chr13_64785759_64786045	14	C	G	22.8	chr13	64785772	C	2	0	2	33	2	0	2	33	4	0	2	39	7	0	2	48	8	0	2	51	2	0	2	33	N	527	+99.	1
+Contig66_chr13_66021813_66022244	319	C	T	125.0	chr13	66022136	C	11	0	2	60	16	0	2	75	15	0	2	75	12	0	2	63	17	0	2	78	8	0	2	51	N	14	0.055	3
+Contig48_chr14_11839435_11843272	3014	A	G	163.0	chr14	11842446	A	10	0	2	57	8	0	2	51	13	0	2	66	10	0	2	57	5	0	2	42	10	0	2	57	Y	31	0.908	0
+Contig9_chr14_23353717_23354432	80	G	A	61.3	chr14	23353797	G	3	0	2	36	6	0	2	45	11	0	2	60	8	0	2	51	4	0	2	39	2	4	1	35	Y	11	0.444	0
+Contig14_chr14_24131180_24133488	1633	G	A	131.0	chr14	24132818	G	21	0	2	90	16	0	2	75	12	0	2	63	10	0	2	57	11	0	2	60	20	0	2	87	Y	36	0.347	0
+Contig28_chr14_26905747_26909514	975	G	C	3.13	chr14	26906723	G	16	0	2	75	10	0	2	57	12	0	2	63	15	0	2	72	10	0	2	57	7	0	2	48	N	287	0.117	2
+Contig14_chr14_29616948_29618316	109	G	A	80.3	chr14	29617053	-	17	0	2	78	16	0	2	75	16	0	2	75	10	0	2	57	17	0	2	78	19	0	2	84	Y	32	1.051	0
+Contig24_chr14_29728478_29728839	242	T	A	107.0	chr14	29728724	T	2	0	2	33	12	0	2	63	10	0	2	57	12	0	2	63	5	0	2	42	9	0	2	54	N	70	2.712	0
+Contig76_chr14_30028102_30029179	1046	C	T	38.5	chr14	30029169	T	3	0	2	36	6	0	2	45	9	0	2	54	7	0	2	48	9	0	2	54	8	0	2	51	Y	96	+99.	0
+Contig115_chr14_31417207_31417574	259	A	G	12.1	chr14	31417454	G	13	0	2	66	15	0	2	72	21	0	2	90	12	0	2	63	13	0	2	66	9	0	2	54	N	28	5.379	2
+Contig70_chr14_46653662_46653790	111	G	A	46.7	chr14	46653768	G	7	0	2	48	5	0	2	42	11	0	2	60	11	0	2	60	8	0	2	51	10	0	2	57	N	21	+99.	2
+Contig43_chr14_49991855_49993511	918	A	G	112.0	chr14	49992767	G	15	0	2	72	10	0	2	57	11	0	2	63	9	0	2	54	12	0	2	63	9	0	2	54	Y	6	0.314	1
+Contig64_chr14_56768376_56768902	473	C	T	29.0	chr14	56768832	C	15	0	2	72	11	0	2	60	14	0	2	69	14	0	2	69	7	0	2	48	9	0	2	54	Y	91	8.281	0
+Contig60_chr15_18493036_18494316	150	G	A	92.6	chr15	18493188	G	9	0	2	54	13	0	2	66	9	0	2	54	6	0	2	45	5	0	2	42	12	0	2	63	Y	45	0.125	0
+Contig213_chr15_19567788_19568626	196	A	C	13.9	chr15	19567992	A	4	0	2	39	2	0	2	33	7	0	2	48	4	0	2	39	4	0	2	39	6	0	2	45	Y	111	0.043	0
+Contig59_chr15_22138344_22138535	120	G	C	142.0	chr15	22138470	C	11	0	2	60	10	0	2	57	18	0	2	81	4	0	2	39	10	0	2	57	15	0	2	72	N	8	2.553	0
+Contig112_chr15_26772864_26773267	374	C	T	21.6	chr15	26773244	C	4	0	2	39	4	0	2	39	5	0	2	42	2	0	2	33	4	0	2	39	3	0	2	36	N	18	+99.	0
+Contig24_chr15_26894765_26895003	155	G	A	87.6	chr15	-1	N	6	0	2	45	5	0	2	42	7	0	2	48	4	0	2	39	4	0	2	39	2	0	2	33	N	-1	0.178	0
+Contig2_chr15_33944796_33947182	1860	G	A	99.5	chr15	33946654	G	10	0	2	57	11	0	2	60	16	0	2	75	14	0	2	69	14	0	2	69	16	0	2	75	Y	16	0.252	0
+Contig73_chr15_34690052_34691332	714	T	C	130.0	chr15	34690769	T	7	0	2	48	7	0	2	48	17	0	2	78	9	0	2	54	9	0	2	54	4	0	2	39	Y	7	6.003	0
+Contig68_chr15_37747190_37747426	126	G	A	130.0	chr15	37747331	G	14	0	2	69	14	0	2	69	11	0	2	63	19	0	2	84	13	0	2	66	21	0	2	90	N	229	0.255	0
+Contig35_chr15_41400484_41400672	160	A	C	143.0	chr15	-1	N	1	0	2	30	2	0	2	33	0	0	-1	0	2	0	2	33	3	0	2	36	2	0	2	33	N	-1	+99.	0
+Contig104_chr15_45106954_45107158	70	A	T	64.4	chr15	45107015	A	6	0	2	45	6	0	2	45	19	0	2	84	7	0	2	48	7	0	2	48	3	0	2	36	N	202	4.319	0
+Contig119_chr16_6160274_6160477	180	G	A	54.8	chr16	6160457	G	7	0	2	48	6	0	2	45	12	0	2	63	3	0	2	36	11	0	2	60	10	0	2	57	N	42	+99.	0
+Contig126_chr16_10611887_10612152	150	G	T	145.0	chr16	10612037	G	14	0	2	69	9	0	2	54	11	0	2	63	8	0	2	51	8	0	2	51	11	0	2	60	N	15	0.104	6
+Contig114_chr16_12565220_12565676	10	G	A	134.0	chr16	12565230	G	0	0	-1	0	2	0	2	33	2	0	2	33	0	0	-1	0	1	0	2	30	1	0	2	30	N	333	+99.	0
+Contig43_chr16_20200090_20200514	70	A	G	58.6	chr16	20200154	A	11	0	2	60	15	0	2	72	15	0	2	72	6	0	2	45	9	0	2	54	12	0	2	63	Y	2	0.466	1
+Contig60_chr16_28079136_28080263	588	T	G	157.0	chr16	28079739	T	22	0	2	93	20	0	2	87	22	0	2	93	17	0	2	78	12	0	2	63	10	0	2	57	Y	105	5.999	1
+Contig70_chr16_33758668_33759655	104	A	T	58.1	chr16	33758772	A	6	0	2	45	7	0	2	48	17	0	2	78	14	0	2	69	8	0	2	51	10	0	2	57	N	54	0.162	0
+Contig66_chr16_37935682_37935831	116	T	C	99.2	chr16	37935802	C	12	0	2	63	6	0	2	45	19	0	2	84	12	0	2	63	13	0	2	66	17	0	2	78	N	266	+99.	2
+Contig16_chr16_40451506_40451643	84	A	G	59.8	chr16	40451592	A	7	0	2	48	5	0	2	42	7	0	2	48	13	0	2	66	14	0	2	69	19	0	2	84	N	45	5.061	0
+Contig53_chr16_49888293_49888587	260	G	A	108.0	chr16	49888550	A	4	0	2	39	1	0	2	30	3	0	2	36	5	0	2	42	2	0	2	33	2	0	2	33	Y	9	0.261	1
+Contig31_chr17_12128267_12129637	205	G	A	90.5	chr17	12128484	G	7	0	2	48	6	0	2	45	6	0	2	45	11	0	2	60	7	0	2	48	4	0	2	39	Y	10	0.246	0
+Contig50_chr17_12247973_12249183	889	G	T	47.6	chr17	12248878	G	0	1	2	9	8	0	2	51	9	2	2	21	7	2	2	21	15	0	2	72	0	3	0	9	Y	1	1.181	0
+Contig1_chr17_12979232_12980380	808	G	T	12.3	chr17	12980028	G	18	0	2	81	12	0	2	63	21	0	2	90	13	0	2	66	22	0	2	93	18	0	2	81	Y	9	0.336	1
+Contig63_chr17_14186372_14186928	54	C	T	70.7	chr17	14186427	C	6	0	2	45	2	0	2	33	5	0	2	42	6	0	2	45	3	0	2	36	3	0	2	36	Y	11	0.560	3
+Contig42_chr17_23434859_23438330	2100	C	T	39.5	chr17	23436985	T	4	0	2	39	7	0	2	48	7	0	2	48	3	0	2	36	6	0	2	45	2	0	2	33	Y	25	0.344	0
+Contig63_chr17_23796320_23796814	220	A	G	54.0	chr17	23796536	G	6	0	2	45	4	0	2	39	5	0	2	42	6	0	2	45	4	0	2	39	6	0	2	45	Y	139	0.067	1
+Contig76_chr17_24107434_24107834	316	T	C	141.0	chr17	24107726	T	19	0	2	84	15	0	2	72	20	0	2	87	16	0	2	75	11	0	2	60	18	0	2	81	Y	30	0.175	2
+Contig99_chr17_26021506_26022200	505	C	T	88.8	chr17	26022017	T	15	0	2	72	13	0	2	66	19	0	2	84	9	0	2	54	10	0	2	57	11	0	2	60	Y	1	0.172	1
+Contig59_chr17_26790302_26795045	287	C	T	45.1	chr17	26790582	C	8	0	2	51	6	0	2	45	13	0	2	66	6	0	2	45	15	0	2	72	12	0	2	63	Y	75	0.019	1
+Contig99_chr17_27018324_27019378	446	G	A	31.1	chr17	27018776	G	14	0	2	69	12	0	2	63	14	0	2	69	10	0	2	57	9	0	2	54	11	0	2	60	Y	13	0.290	4
+Contig125_chr17_27739115_27739410	63	G	A	107.0	chr17	27739177	G	8	0	2	51	11	0	2	60	16	0	2	75	8	0	2	51	4	0	2	39	15	0	2	72	N	100	0.819	0
+Contig115_chr17_37489899_37490101	159	G	A	62.4	chr17	37490067	G	4	0	2	39	3	0	2	36	4	0	2	39	4	0	2	39	3	0	2	36	6	0	2	45	N	4	1.411	1
+Contig180_chr17_45154356_45154925	524	A	G	146.0	chr17	45154886	G	7	0	2	48	9	0	2	54	7	0	2	48	9	0	2	54	4	0	2	39	8	0	2	51	Y	11	+99.	2
+Contig61_chr17_48221795_48223545	1404	T	A	177.0	chr17	48223216	T	15	0	2	72	14	0	2	69	24	0	2	99	17	0	2	78	18	0	2	81	24	0	2	99	Y	161	0.633	2
+Contig27_chr17_61713766_61716585	1056	G	C	40.0	chr17	61714821	G	4	0	2	39	8	0	2	51	10	0	2	57	6	0	2	45	6	0	2	45	3	0	2	36	N	6	2.200	4
+Contig229_chr18_3706523_3708577	1076	A	G	83.9	chr18	3707630	A	11	0	2	60	13	0	2	66	26	0	2	105	11	0	2	60	15	0	2	72	17	0	2	78	Y	63	0.445	0
+Contig24_chr18_14049894_14050480	24	A	G	123.0	chr18	14049918	A	5	0	2	42	5	0	2	42	4	0	2	39	6	0	2	45	7	0	2	48	5	0	2	42	Y	17	+99.	0
+Contig30_chr18_18771753_18772121	39	C	G	48.5	chr18	18771787	C	2	0	2	33	5	0	2	42	2	0	2	33	6	0	2	45	3	0	2	36	2	0	2	33	N	5	0.135	0
+Contig123_chr18_19916160_19916379	116	G	A	79.2	chr18	19916272	A	14	0	2	69	12	0	2	63	14	0	2	69	6	0	2	45	11	0	2	60	10	0	2	57	N	26	0.172	0
+Contig82_chr18_27305489_27306229	566	C	T	49.5	chr18	27306051	A	6	0	2	45	6	0	2	45	10	0	2	57	11	0	2	60	6	0	2	45	7	0	2	48	N	1	0.349	0
+Contig71_chr18_34324706_34326687	136	G	A	151.0	chr18	34324841	G	9	0	2	54	9	0	2	54	17	0	2	78	8	0	2	51	11	0	2	60	10	0	2	57	Y	2	2.129	2
+Contig16_chr18_34672093_34673044	538	T	C	58.2	chr18	34672635	T	8	0	2	51	15	0	2	72	16	0	2	75	15	0	2	72	9	0	2	57	18	0	2	81	Y	8	0.214	1
+Contig96_chr18_38492535_38493333	624	G	A	119.0	chr18	38493162	T	17	0	2	78	12	0	2	63	13	0	2	66	16	0	2	75	8	0	2	51	15	0	2	72	Y	127	0.131	0
+Contig226_chr18_47753756_47754666	427	T	C	21.1	chr18	47754215	T	10	0	2	57	4	0	2	39	8	0	2	51	5	0	2	42	6	0	2	45	7	0	2	48	Y	42	0.522	0
+Contig170_chr18_49411558_49412230	94	C	A	74.3	chr18	49411655	C	14	0	2	69	10	0	2	57	9	0	2	54	10	0	2	57	3	0	2	36	3	0	2	36	N	9	1.457	0
+Contig192_chr18_49419342_49420737	1058	C	T	42.8	chr18	49420381	A	3	0	2	36	4	0	2	39	5	0	2	42	8	0	2	51	3	0	2	36	3	0	2	36	Y	34	2.107	2
+Contig64_chr18_55979770_55980315	49	G	A	89.1	chr18	55979824	G	3	0	2	36	9	0	2	54	7	0	2	51	4	0	2	39	3	0	2	36	3	0	2	36	Y	-1	2.124	0
+Contig20_chr18_58130301_58130735	112	A	G	74.4	chr18	58130413	A	12	0	2	66	11	0	2	60	11	0	2	60	12	0	2	63	6	0	2	45	6	0	2	45	Y	10	0.290	0
+Contig146_chr19_5221790_5223013	143	A	G	114.0	chr19	5221916	-	1	0	2	30	4	0	2	39	3	0	2	36	5	0	2	42	2	0	2	33	5	0	2	42	Y	12	0.870	0
+Contig13_chr19_7739961_7740118	26	C	G	220.0	chr19	-1	N	3	0	2	36	1	0	2	30	2	0	2	33	3	0	2	36	1	0	2	30	2	0	2	33	N	-1	+99.	0
+Contig67_chr19_12398520_12399367	499	C	T	161.0	chr19	12399017	C	10	0	2	57	11	0	2	60	20	0	2	87	14	0	2	69	24	0	2	99	8	0	2	51	Y	137	5.634	0
+Contig66_chr19_16285672_16287223	996	C	T	190.0	chr19	16286674	C	9	0	2	57	14	0	2	69	16	0	2	78	17	0	2	78	8	0	2	51	22	0	2	93	Y	40	0.110	0
+Contig129_chr19_25541958_25542221	202	T	C	68.1	chr19	25542154	C	11	0	2	60	19	0	2	84	10	0	2	60	17	0	2	78	9	0	2	54	12	0	2	63	N	-1	2.551	1
+Contig152_chr19_34274440_34275622	1072	C	T	48.0	chr19	34275509	T	1	0	2	30	2	0	2	33	1	0	2	30	1	0	2	30	4	0	2	39	5	0	2	42	N	71	0.309	0
+Contig29_chr19_37339947_37341911	1692	C	T	211.0	chr19	37341631	C	15	0	2	72	20	0	2	87	11	0	2	60	15	0	2	72	3	0	2	36	12	0	2	63	Y	7	0.096	0
+Contig39_chr19_47709708_47711327	444	C	T	36.8	chr19	47710148	T	10	0	2	57	4	0	2	39	8	0	2	51	9	0	2	54	6	0	2	45	6	0	2	45	Y	95	1.251	1
+Contig60_chr19_54013816_54014398	281	A	G	138.0	chr19	54014103	C	6	0	2	45	15	0	2	72	7	0	2	48	10	0	2	57	15	0	2	72	10	0	2	57	Y	188	1.271	0
+Contig251_chr19_56559098_56559626	452	T	C	3.36	chr19	56559549	T	12	0	2	63	13	0	2	66	21	0	2	90	15	0	2	72	14	0	2	69	11	0	2	60	N	1	0.117	0
+Contig50_chr20_12138509_12141975	3206	C	A	248.0	chr20	12141763	C	8	0	2	51	15	0	2	72	14	0	2	69	6	0	2	45	10	0	2	57	7	0	2	48	Y	2	0.384	0
+Contig36_chr20_32631363_32632049	176	G	A	24.1	chr20	32631526	G	7	0	2	48	14	0	2	69	19	0	2	84	14	0	2	69	15	0	2	72	16	0	2	75	N	50	1.150	0
+Contig39_chr20_36316398_36316498	57	C	T	30.3	chr20	36316455	C	2	0	2	33	0	1	2	8	0	0	-1	0	0	1	2	10	0	0	-1	0	0	0	-1	0	N	-483	+99.	0
+Contig32_chr20_36468058_36468869	66	C	T	40.4	chr20	36468127	C	6	0	2	45	3	0	2	36	4	0	2	39	5	0	2	42	3	0	2	36	4	0	2	39	N	59	0.281	0
+Contig24_chr20_38203888_38204900	834	C	T	132.0	chr20	38204731	C	9	0	2	54	17	0	2	78	20	0	2	87	8	0	2	51	11	0	2	60	17	0	2	78	Y	14	0.397	0
+Contig79_chr20_44263127_44264103	456	G	T	31.5	chr20	44263573	G	22	0	2	93	16	0	2	75	15	0	2	72	19	0	2	84	13	0	2	66	26	0	2	105	Y	8	3.250	0
+Contig26_chr20_45878482_45878787	197	A	G	160.0	chr20	45878672	A	17	0	2	78	15	0	2	72	11	0	2	63	17	0	2	78	12	0	2	63	10	0	2	57	N	14	0.535	0
+Contig119_chr20_46550670_46551383	609	G	A	139.0	chr20	46551277	G	7	0	2	48	17	0	2	78	19	0	2	84	20	0	2	87	9	0	2	54	15	0	2	72	Y	7	0.488	1
+Contig50_chr21_4178523_4178687	121	G	A	362.0	chr21	4178640	G	8	0	2	51	14	0	2	69	5	0	2	42	3	0	2	36	11	0	2	60	4	0	2	39	N	392	0.483	0
+Contig103_chr21_10177255_10177765	121	G	A	125.0	chr21	10177367	G	12	0	2	63	10	0	2	57	10	0	2	57	17	0	2	78	14	0	2	69	7	0	2	51	Y	37	0.213	3
+Contig1_chr21_10805534_10806399	766	A	G	146.0	chr21	10806301	G	10	0	2	57	6	0	2	45	9	0	2	54	6	0	2	45	7	0	2	48	5	0	2	42	Y	20	0.319	0
+Contig46_chr21_21029492_21030645	443	C	T	5.37	chr21	21029910	C	15	0	2	72	11	0	2	60	16	0	2	75	15	0	2	72	13	0	2	66	6	0	2	45	Y	96	3.737	0
+Contig129_chr21_31045749_31046924	381	A	G	129.0	chr21	31046141	A	19	0	2	84	8	0	2	51	23	0	2	96	12	0	2	63	15	0	2	72	18	0	2	81	Y	69	0.028	2
+Contig23_chr21_31651123_31651986	840	C	T	71.3	chr21	31651957	T	6	0	2	45	9	0	2	54	8	0	2	51	10	0	2	57	4	0	2	39	7	0	2	48	Y	105	2.977	3
+Contig64_chr21_43341847_43342031	84	T	C	114.0	chr21	43341926	T	11	0	2	60	9	0	2	54	10	0	2	57	6	0	2	45	6	0	2	45	7	0	2	48	N	10	3.954	2
+Contig60_chr21_43475347_43475824	175	C	T	8.05	chr21	43475551	T	6	0	2	45	7	0	2	48	13	0	2	66	6	0	2	45	14	0	2	69	14	0	2	69	N	45	0.058	0
+Contig64_chr21_45377513_45377872	19	C	T	60.7	chr21	-1	N	3	0	2	36	2	0	2	33	1	0	2	30	0	0	-1	0	3	0	2	36	1	0	2	30	N	-1	+99.	1
+Contig159_chr22_7896450_7896974	109	G	C	151.0	chr22	7896570	G	16	0	2	75	5	7	1	62	14	0	2	69	16	0	2	75	13	0	2	66	13	0	2	66	Y	16	0.465	0
+Contig46_chr22_9416920_9417467	381	G	A	145.0	chr22	9417259	G	10	0	2	57	9	0	2	54	10	0	2	57	6	0	2	45	13	0	2	66	7	0	2	48	Y	154	0.242	0
+Contig86_chr22_9440787_9441725	713	T	G	119.0	chr22	9441488	G	6	0	2	45	12	0	2	63	10	0	2	57	11	0	2	60	13	0	2	66	16	0	2	75	Y	132	0.218	0
+Contig16_chr22_15636960_15637372	236	A	C	9.79	chr22	15637192	T	4	0	2	39	5	0	2	42	12	0	2	63	7	0	2	48	6	0	2	45	11	0	2	60	Y	5	2.163	0
+Contig4_chr22_16114310_16114546	128	G	C	101.0	chr22	16114432	G	10	0	2	57	13	0	2	66	20	0	2	87	20	0	2	87	16	0	2	75	9	0	2	54	N	19	0.526	0
+Contig23_chr22_34612023_34612568	167	C	G	92.3	chr22	34612181	C	11	0	2	60	18	0	2	81	13	0	2	66	8	0	2	51	12	0	2	63	14	0	2	69	Y	7	0.409	0
+Contig4_chr22_38252245_38253712	799	A	C	159.0	chr22	38253064	A	18	0	2	81	15	0	2	72	15	0	2	72	20	0	2	87	27	0	2	108	15	0	2	72	Y	90	4.330	0
+Contig122_chr22_48412466_48414788	1888	C	T	125.0	chr22	48414355	T	16	0	2	75	15	0	2	72	16	0	2	75	14	0	2	72	12	0	2	63	7	0	2	48	N	42	0.122	0
+Contig77_chr22_49764414_49764875	353	C	A	148.0	chr22	49764777	C	7	4	1	65	18	0	2	81	16	0	2	75	20	0	2	87	4	3	1	52	9	4	1	67	Y	12	0.941	0
+Contig26_chr22_57817664_57819633	1453	A	G	150.0	chr22	57819121	G	9	0	2	54	9	0	2	54	13	0	2	66	15	0	2	72	11	0	2	60	14	0	2	69	N	15	0.471	1
+Contig348_chr22_62406104_62406495	189	C	A	134.0	chr22	62406302	A	9	0	2	54	14	0	2	69	11	0	2	60	10	0	2	57	12	0	2	63	6	0	2	45	Y	5	0.912	0
+Contig133_chr23_3525134_3526502	1223	A	G	201.0	chr23	3526387	A	11	0	2	60	13	0	2	66	23	0	2	96	21	0	2	90	13	0	2	66	10	0	2	57	Y	61	1.359	0
+Contig111_chr23_7058063_7058181	107	G	A	108.0	chr23	7058162	A	8	0	2	51	8	0	2	51	7	0	2	48	2	0	2	33	5	0	2	42	6	0	2	45	N	3	+99.	0
+Contig79_chr23_7844129_7844837	110	C	A	141.0	chr23	7844237	T	13	0	2	66	15	0	2	72	17	0	2	78	12	0	2	63	15	0	2	72	16	0	2	75	Y	40	0.339	0
+Contig38_chr23_9201002_9201725	597	C	T	155.0	chr23	9201609	T	17	0	2	78	8	0	2	51	13	0	2	66	5	0	2	42	11	0	2	60	7	0	2	48	Y	167	0.633	1
+Contig33_chr23_20672540_20674320	347	T	A	91.4	chr23	20672885	A	11	0	2	60	14	0	2	69	15	0	2	72	7	0	2	48	12	0	2	63	18	0	2	81	Y	31	0.452	1
+Contig35_chr23_28447813_28449115	70	T	A	21.3	chr23	28447881	T	9	0	2	54	8	0	2	51	10	0	2	57	9	0	2	54	10	0	2	57	12	0	2	63	N	251	0.163	1
+Contig51_chr23_30590939_30591162	140	C	T	142.0	chr23	30591080	C	14	0	2	69	4	0	2	39	10	0	2	57	12	0	2	63	14	0	2	69	4	0	2	39	N	13	1.658	0
+Contig57_chr23_32216351_32216721	179	T	G	143.0	chr23	32216534	T	15	0	2	72	15	0	2	72	23	0	2	96	13	0	2	66	16	0	2	75	15	0	2	72	N	32	1.387	1
+Contig93_chr23_35744841_35745791	40	A	T	30.4	chr23	35744880	T	6	0	2	45	7	0	2	48	7	0	2	48	2	0	2	33	5	0	2	42	5	0	2	42	Y	50	2.173	0
+Contig99_chr23_42543966_42544147	14	G	A	357.0	chr23	42543980	G	4	0	2	39	2	0	2	33	3	0	2	36	3	0	2	36	1	0	2	30	2	0	2	33	N	69	+99.	0
+Contig32_chr23_48285289_48286638	186	T	C	176.0	chr23	48285470	T	18	0	2	81	12	0	2	63	16	0	2	75	13	0	2	66	9	0	2	54	9	0	2	54	Y	4	4.238	1
+Contig50_chr24_22515247_22516072	761	C	T	243.0	chr24	22515981	T	11	0	2	60	10	0	2	57	8	0	2	51	9	0	2	54	18	0	2	81	8	0	2	51	Y	1	0.190	0
+Contig92_chr24_28935897_28936321	13	G	A	47.1	chr24	-1	N	2	0	2	33	1	0	2	30	0	0	-1	0	0	0	-1	0	1	0	2	30	0	0	-1	0	Y	-1	+99.	2
+Contig84_chr24_29196623_29199644	466	C	T	126.0	chr24	29197091	T	7	0	2	48	11	0	2	60	8	0	2	51	7	0	2	48	11	0	2	60	15	0	2	72	Y	42	0.215	0
+Contig35_chr24_30150986_30151507	492	A	C	114.0	chr24	30151448	A	5	0	2	42	2	0	2	33	2	0	2	33	3	0	2	36	3	0	2	36	5	0	2	42	N	41	2.587	6
+Contig61_chr24_30465488_30465834	149	G	T	68.2	chr24	30465637	G	13	0	2	66	4	2	2	11	18	0	2	81	11	0	2	60	11	0	2	60	9	0	2	54	N	99	0.105	2
+Contig145_chr24_34778364_34778898	163	T	C	372.0	chr24	34778541	C	10	0	2	57	8	0	2	51	12	0	2	63	12	0	2	63	6	1	2	31	7	0	2	48	Y	40	0.037	0
+Contig34_chr24_36147443_36150244	2679	C	T	140.0	chr24	36150125	C	13	0	2	66	7	0	2	48	14	0	2	69	14	0	2	69	10	0	2	57	13	0	2	66	N	282	0.099	1
+Contig164_chr24_46598127_46599206	84	C	T	105.0	chr24	46598214	C	13	0	2	66	12	0	2	63	15	0	2	72	15	0	2	72	11	0	2	60	8	0	2	51	Y	22	1.262	1
+Contig144_chr25_4011170_4013134	541	A	G	160.0	chr25	4011690	A	12	0	2	63	17	0	2	78	13	0	2	66	13	0	2	66	13	0	2	66	13	0	2	66	Y	5	0.087	0
+Contig81_chr25_6103472_6104760	699	G	A	378.0	chr25	6104190	A	14	0	2	69	16	0	2	75	13	0	2	66	11	0	2	60	11	0	2	60	12	0	2	63	Y	33	0.789	2
+Contig152_chr25_7486442_7487609	75	A	G	11.6	chr25	7486515	A	17	0	2	78	13	0	2	66	8	0	2	51	16	0	2	75	8	0	2	51	6	0	2	45	N	2	0.158	0
+Contig24_chr25_7695778_7698612	2714	C	T	130.0	chr25	7698446	C	16	0	2	75	13	0	2	66	22	0	2	93	17	0	2	78	10	0	2	57	17	0	2	78	Y	27	0.346	0
+Contig89_chr25_8635170_8636009	586	G	C	209.0	chr25	8635744	G	13	0	2	66	13	0	2	66	21	0	2	93	14	0	2	69	15	0	2	72	15	0	2	72	Y	14	0.067	0
+Contig77_chr25_10796299_10796481	2	T	C	17.3	chr25	-1	N	1	0	2	30	0	0	-1	0	1	0	2	30	0	0	-1	0	0	0	-1	0	0	0	-1	0	N	-1	+99.	0
+Contig73_chr25_14177327_14177474	125	A	C	6.85	chr25	14177464	A	0	0	-1	0	0	0	-1	0	0	0	-1	0	0	0	-1	0	0	0	-1	0	0	0	-1	0	N	27	+99.	1
+Contig59_chr25_18196776_18197707	785	G	A	112.0	chr25	18197551	G	8	10	1	42	27	0	2	108	21	0	2	90	18	0	2	81	10	0	2	57	14	0	2	69	N	36	3.625	0
+Contig103_chr25_38891221_38892140	407	G	A	131.0	chr25	38891644	G	8	0	2	51	14	0	2	69	18	0	2	81	8	0	2	51	8	0	2	51	11	0	2	60	Y	149	0.167	4
+Contig84_chr25_42407960_42408708	55	C	T	119.0	chr25	42408013	C	6	0	2	45	9	0	2	54	11	0	2	60	9	0	2	54	7	0	2	48	8	0	2	51	Y	11	0.121	0
+Contig73_chr25_43562500_43564110	955	T	C	52.1	chr25	43563469	C	9	0	2	57	4	0	2	39	6	0	2	45	5	0	2	42	7	0	2	48	10	0	2	57	Y	4	1.406	0
+Contig37_chr25_51074433_51074885	170	A	G	102.0	chr25	51074589	G	11	0	2	60	7	0	2	48	6	0	2	45	15	0	2	72	9	0	2	54	7	0	2	48	Y	68	0.207	1
+Contig204_chr26_4311195_4311778	170	C	T	16.9	chr26	4311363	T	20	0	2	87	8	0	2	51	13	0	2	66	18	0	2	81	11	0	2	60	14	0	2	69	N	35	0.085	0
+Contig122_chr26_7622321_7623491	106	C	G	139.0	chr26	7622423	C	3	0	2	36	9	0	2	54	10	0	2	57	12	0	2	63	9	0	2	54	5	0	2	42	N	19	0.458	0
+Contig11_chr26_11062142_11062902	707	C	A	108.0	chr26	11062836	T	7	0	2	48	8	0	2	51	16	0	2	75	10	0	2	57	6	0	2	45	14	0	2	69	Y	-1	4.709	0
+Contig133_chr26_17695661_17696368	39	T	G	98.7	chr26	17695700	T	10	0	2	57	3	0	2	36	11	0	2	60	9	0	2	54	2	0	2	33	1	0	2	30	N	85	3.402	0
+Contig157_chr26_23894107_23895229	25	C	T	50.2	chr26	23894140	C	0	0	-1	0	4	0	2	39	2	0	2	33	4	0	2	39	3	0	2	36	3	0	2	36	Y	51	+99.	0
+Contig146_chr26_26622638_26623906	574	G	A	186.0	chr26	26623219	A	11	0	2	60	12	0	2	63	9	0	2	54	11	0	2	60	9	0	2	54	12	0	2	63	Y	1	0.318	0
+Contig8_chr26_27834126_27834326	140	G	A	41.7	chr26	27834268	G	13	0	2	66	7	0	2	48	13	0	2	66	11	0	2	60	12	0	2	63	6	0	2	45	N	29	0.142	1
+Contig78_chr26_31128839_31129005	123	T	C	145.0	chr26	-1	N	11	0	2	60	3	0	2	36	7	0	2	48	8	0	2	51	10	0	2	46	7	0	2	48	N	-1	1.230	1
+Contig28_chr26_32935355_32935833	289	T	C	77.9	chr26	32935638	T	15	0	2	72	22	0	2	93	15	0	2	72	9	0	2	54	15	0	2	72	17	0	2	78	Y	10	2.258	1
+Contig36_chr26_36606876_36607240	115	A	T	139.0	chr26	36606979	A	1	0	2	30	7	0	2	48	14	0	2	69	13	0	2	66	9	0	2	54	3	0	2	36	Y	8	0.071	0
+Contig135_chr27_6853874_6854079	158	C	T	116.0	chr27	6854032	T	18	0	2	81	19	0	2	84	13	0	2	66	7	0	2	48	8	0	2	51	11	0	2	60	N	4	0.060	1
+Contig47_chr27_11777710_11777915	25	A	G	67.3	chr27	11777731	A	3	0	2	36	5	0	2	42	6	0	2	45	10	0	2	57	9	0	2	54	6	0	2	45	N	97	+99.	0
+Contig23_chr27_14633002_14633153	23	G	A	128.0	chr27	14633023	A	3	0	2	36	4	0	2	39	5	0	2	42	5	0	2	42	3	0	2	36	2	0	2	33	N	240	3.881	0
+Contig31_chr27_14987233_14988055	630	A	G	48.5	chr27	14987850	G	10	0	2	57	2	0	2	33	4	0	2	39	4	0	2	39	1	0	2	30	4	0	2	39	Y	9	0.089	1
+Contig29_chr27_15428166_15429413	380	T	C	140.0	chr27	15428539	T	15	0	2	72	15	0	2	72	17	0	2	78	15	0	2	72	15	0	2	72	15	0	2	72	Y	47	0.916	1
+Contig31_chr27_19519489_19520891	129	G	T	14.9	chr27	19519624	T	12	0	2	63	19	0	2	84	20	0	2	87	16	0	2	75	10	0	2	57	11	0	2	60	Y	48	2.756	0
+Contig64_chr27_34654435_34654621	132	C	A	115.0	chr27	34654567	T	2	0	2	33	2	0	2	33	5	0	2	42	3	0	2	36	3	0	2	36	8	0	2	51	N	12	0.297	1
+Contig35_chr27_40596169_40596445	20	G	C	133.0	chr27	40596189	G	8	0	2	51	3	0	2	36	4	0	2	39	2	0	2	33	4	0	2	39	4	0	2	39	Y	4	+99.	1
+Contig85_chr27_45471750_45472022	211	G	A	53.1	chr27	45471964	G	18	0	2	81	10	0	2	57	15	0	2	72	0	13	0	36	16	0	2	75	14	0	2	69	N	75	2.502	1
+Contig131_chr28_6481806_6483783	138	C	T	36.2	chr28	6481953	C	12	0	2	63	12	0	2	63	20	0	2	87	11	0	2	60	10	0	2	57	12	0	2	63	Y	10	0.387	0
+Contig141_chr28_10027332_10028242	780	T	G	74.8	chr28	10028095	T	10	0	2	57	11	0	2	60	14	0	2	69	10	0	2	57	7	0	2	48	9	0	2	54	Y	19	3.348	0
+Contig144_chr28_15468203_15470548	743	G	A	20.0	chr28	15468942	G	13	0	2	66	12	0	2	63	10	0	2	57	11	0	2	60	16	0	2	75	7	0	2	48	N	14	0.053	0
+Contig47_chr28_21311718_21312366	541	G	A	116.0	chr28	21312258	G	9	0	2	54	6	0	2	45	12	0	2	63	6	0	2	45	5	0	2	45	12	0	2	63	N	9	0.240	0
+Contig60_chr28_30197166_30197364	92	T	C	164.0	chr28	30197258	T	10	0	2	57	13	0	2	66	15	0	2	72	16	0	2	75	12	0	2	63	11	0	2	60	N	369	1.139	0
+Contig201_chr28_36339953_36341322	260	C	T	6.36	chr28	36340213	T	4	0	2	39	0	0	-1	0	2	0	2	33	2	0	2	33	3	0	2	36	4	0	2	39	N	4	0.183	0
+Contig175_chr28_36441165_36441915	68	T	C	3.83	chr28	36441234	T	4	4	1	15	6	0	2	45	12	0	2	63	15	0	2	72	6	0	2	45	9	0	2	54	N	4	1.610	2
+Contig29_chr29_4726399_4727143	559	A	T	163.0	chr29	4726955	A	15	0	2	72	18	0	2	81	18	0	2	81	16	0	2	75	11	0	2	60	14	0	2	72	Y	161	3.114	0
+Contig48_chr29_13129286_13130137	232	A	G	92.2	chr29	13129514	G	13	0	2	66	11	0	2	60	19	0	2	84	16	0	2	75	11	0	2	60	17	0	2	78	Y	337	2.581	1
+Contig64_chr29_15736891_15737257	344	T	C	40.4	chr29	15737233	C	1	0	2	30	0	0	-1	0	0	0	-1	0	2	0	2	33	0	0	-1	0	0	0	-1	0	N	58	+99.	0
+Contig33_chr29_17000374_17000921	71	C	T	48.6	chr29	17000441	-	4	0	2	39	9	0	2	54	12	0	2	66	10	0	2	57	7	0	2	48	4	0	2	39	N	26	5.491	0
+Contig34_chr29_17581796_17584016	2105	C	T	126.0	chr29	17583890	T	14	0	2	69	11	0	2	60	18	0	2	81	12	0	2	63	10	0	2	57	10	0	2	57	Y	22	2.208	0
+Contig19_chr29_20976080_20977761	1007	G	A	115.0	chr29	20977076	G	19	0	2	84	22	0	2	93	22	0	2	93	22	0	2	93	11	0	2	60	13	0	2	66	Y	4	1.915	0
+Contig51_chr29_21149853_21150467	266	C	T	146.0	chr29	21150118	C	12	0	2	63	12	0	2	63	23	0	2	96	14	0	2	69	13	0	2	66	10	0	2	57	Y	4	0.051	0
+Contig1_chr30_5992217_5993068	106	C	T	129.0	chr30	5992319	C	10	0	2	57	11	0	2	60	7	0	2	48	11	0	2	60	10	0	2	57	12	0	2	63	Y	76	1.079	0
+Contig1_chr30_8232878_8233406	402	C	T	127.0	chr30	8233264	C	8	0	2	51	19	0	2	84	16	0	2	75	18	0	2	81	10	0	2	57	14	0	2	69	Y	358	5.283	0
+Contig108_chr30_9436961_9437520	546	C	T	39.8	chr30	9437502	C	7	0	2	48	5	0	2	42	2	0	2	33	7	0	2	48	5	0	2	42	7	0	2	48	Y	64	+99.	0
+Contig165_chr30_25804389_25804926	190	T	C	126.0	chr30	25804592	C	3	0	2	36	8	0	2	51	7	0	2	48	10	0	2	57	7	0	2	48	4	0	2	39	Y	113	0.329	0
+Contig193_chr30_27495616_27496125	434	C	A	234.0	chr30	27496024	C	13	0	2	66	16	0	2	75	25	0	2	102	16	0	2	75	13	0	2	66	14	0	2	69	Y	76	2.621	0
+Contig114_chr30_33636712_33637208	34	C	T	142.0	chr30	33636744	C	7	0	2	48	4	1	2	20	6	0	2	45	6	0	2	45	3	4	1	29	5	0	2	42	Y	14	8.028	0
+Contig38_chr31_5164423_5166573	2074	C	T	134.0	chr31	5166501	T	13	0	2	66	10	0	2	57	17	0	2	78	11	0	2	60	17	0	2	78	10	0	2	57	Y	58	+99.	0
+Contig6_chr31_9649308_9650149	431	G	T	162.0	chr31	9649742	G	31	0	2	120	23	0	2	96	17	0	2	78	17	0	2	78	10	0	2	57	16	0	2	75	Y	98	2.200	0
+Contig85_chr31_12242872_12245082	38	G	C	92.4	chr31	12242910	G	1	0	2	30	6	0	2	45	9	0	2	54	8	0	2	51	5	0	2	42	9	0	2	54	N	2	2.340	0
+Contig7_chr31_12384974_12386400	305	C	T	69.6	chr31	12385267	C	6	0	2	45	10	0	2	57	11	0	2	60	11	0	2	60	9	0	2	54	12	0	2	63	Y	44	1.165	0
+Contig90_chr31_17267583_17267778	81	C	A	143.0	chr31	17267665	C	20	0	2	87	6	0	2	45	14	0	2	72	22	0	2	93	17	0	2	78	15	0	2	72	N	7	0.565	0
+Contig68_chr31_20000241_20000597	215	C	T	131.0	chr31	20000454	T	0	0	-1	0	0	0	-1	0	0	0	-1	0	0	0	-1	0	0	0	-1	0	0	0	-1	0	Y	5	3.383	1
+Contig137_chr31_23357653_23358568	885	G	A	119.0	chr31	23358545	G	5	0	2	42	3	0	2	36	3	0	2	36	2	0	2	33	3	0	2	36	4	0	2	39	Y	11	+99.	0
+Contig17_chr31_26433828_26434459	498	T	C	9.79	chr31	26434322	T	18	0	2	81	10	0	2	57	15	0	2	72	13	0	2	66	16	0	2	75	15	0	2	72	Y	137	4.814	0
+Contig9_chr32_19479532_19479735	12	A	G	20.7	chr32	19479544	A	1	0	2	30	2	0	2	33	1	0	2	30	5	0	2	42	3	0	2	36	3	0	2	36	N	17	+99.	0
+Contig30_chr32_25902721_25905783	208	C	G	162.0	chr32	25902927	G	11	0	2	60	13	0	2	66	11	0	2	60	12	0	2	63	7	0	2	48	11	0	2	60	Y	145	0.322	2
+Contig7_chr32_27789513_27789926	20	G	A	7.19	chr32	27789530	A	0	0	-1	0	4	0	2	39	4	0	2	39	4	0	2	39	2	0	2	33	6	0	2	45	Y	14	+99.	0
+Contig42_chr32_38900713_38901320	320	A	G	134.0	chr32	38901021	T	12	0	2	63	10	0	2	57	9	11	1	104	5	0	2	42	19	0	2	84	7	6	1	56	Y	71	0.165	0
+Contig18_chr33_22207246_22209159	1363	G	T	51.5	chr33	22208619	-	16	0	2	75	8	0	2	51	11	0	2	60	10	0	2	57	15	0	2	72	12	0	2	63	Y	59	2.560	0
+Contig104_chr33_22483642_22484187	424	C	T	140.0	chr33	22484054	T	13	0	2	66	16	0	2	75	9	0	2	54	15	0	2	72	13	0	2	66	10	0	2	57	Y	36	0.404	0
+Contig170_chr33_26189421_26189940	292	T	C	98.4	chr33	26189703	T	21	0	2	90	13	0	2	66	15	0	2	72	13	0	2	66	19	0	2	84	13	0	2	66	Y	23	0.307	0
+Contig113_chr34_13341080_13341643	236	C	T	90.7	chr34	13341316	C	4	0	2	39	2	0	2	33	8	0	2	51	4	0	2	39	8	0	2	51	3	0	2	36	Y	47	0.412	3
+Contig405_chr34_14415672_14415979	59	A	G	36.2	chr34	14415731	G	8	0	2	51	2	0	2	33	8	0	2	51	6	0	2	48	3	0	2	36	7	0	2	48	Y	45	0.405	1
+Contig21_chr34_16422980_16425681	2009	G	A	19.4	chr34	16424960	G	0	0	-1	0	0	0	-1	0	0	0	-1	0	5	0	2	42	0	0	-1	0	0	0	-1	0	Y	28	0.196	0
+Contig41_chr34_16544482_16545449	46	T	C	102.0	chr34	16544523	T	5	0	2	42	11	0	2	60	6	0	2	45	0	2	0	3	7	0	2	48	8	0	2	51	Y	215	1.156	0
+Contig8_chr34_18474513_18475673	1122	C	A	129.0	chr34	18475628	A	8	0	2	51	15	0	2	72	13	0	2	66	17	0	2	78	13	0	2	66	6	0	2	45	Y	61	0.123	2
+Contig152_chr34_31794848_31795540	242	G	A	93.2	chr34	31795093	G	11	0	2	60	24	0	2	99	17	0	2	78	15	0	2	72	18	0	2	81	17	0	2	78	Y	123	2.780	0
+Contig28_chr34_41708848_41712034	1381	A	G	78.2	chr34	41710232	A	11	0	2	60	17	0	2	78	15	0	2	72	16	0	2	75	15	0	2	72	14	0	2	69	Y	236	0.234	0
+Contig85_chr34_42798284_42800584	1845	C	T	171.0	chr34	42800126	T	5	0	2	42	7	0	2	48	6	0	2	45	7	0	2	48	6	0	2	45	2	0	2	33	Y	5	2.787	0
+Contig47_chr35_3666773_3667898	348	G	T	124.0	chr35	3667121	G	9	0	2	54	20	0	2	87	18	0	2	81	15	0	2	72	12	0	2	63	14	0	2	69	Y	285	0.235	0
+Contig195_chr35_15722500_15722741	205	G	A	4.08	chr35	15722718	G	3	0	2	36	5	0	2	42	1	0	2	30	6	0	2	45	1	0	2	30	1	0	2	30	N	43	+99.	0
+Contig101_chr35_19513178_19513697	62	C	T	112.0	chr35	19513238	C	12	0	2	63	7	0	2	48	13	0	2	66	7	0	2	48	5	0	2	42	8	0	2	51	N	115	3.135	0
+Contig19_chr35_23887144_23888282	90	C	A	10.1	chr35	23887242	-	3	3	1	12	4	4	1	19	8	6	1	37	4	3	1	11	8	3	2	7	9	3	2	11	Y	105	0.199	0
+Contig47_chr35_24382042_24382526	33	G	A	87.0	chr35	24382076	G	5	0	2	42	4	0	2	39	6	0	2	45	7	0	2	48	4	0	2	39	2	0	2	33	Y	71	+99.	0
+Contig77_chr35_24796947_24797172	65	A	G	52.1	chr35	24797009	A	7	0	2	48	5	0	2	42	8	0	2	51	6	0	2	45	12	0	2	63	10	0	2	57	N	11	1.401	3
+Contig74_chr35_25394343_25394813	303	A	T	221.0	chr35	25394646	G	23	0	2	96	15	0	2	72	25	0	2	105	7	7	1	49	18	0	2	81	16	0	2	75	Y	58	4.298	0
+Contig5_chr36_4562983_4563634	343	C	T	151.0	chr36	4563324	T	20	0	2	87	20	0	2	87	23	0	2	96	24	0	2	99	9	0	2	54	8	0	2	51	Y	40	1.169	0
+Contig75_chr36_7885319_7885588	53	G	A	25.7	chr36	7885372	G	10	0	2	57	8	0	2	51	13	0	2	66	7	0	2	48	4	0	2	39	7	0	2	48	N	7	2.653	0
+Contig184_chr36_18956191_18958552	187	A	G	11.5	chr36	18956371	G	10	0	2	57	11	0	2	60	21	0	2	90	14	0	2	69	7	0	2	48	4	0	2	39	N	278	1.434	2
+Contig12_chr36_21557176_21557828	513	T	A	159.0	chr36	21557695	A	11	0	2	60	14	0	2	69	21	0	2	90	12	0	2	63	15	0	2	72	11	0	2	60	Y	55	0.222	0
+Contig2_chr36_22436067_22436794	653	C	T	73.0	chr36	22436730	C	11	0	2	60	16	0	2	75	13	0	2	66	11	0	2	60	21	0	2	90	21	0	2	90	Y	9	0.534	0
+Contig133_chr36_32954045_32955409	136	A	G	116.0	chr36	32954182	A	16	0	2	75	15	0	2	72	20	0	2	87	11	0	2	60	18	0	2	81	13	0	2	66	Y	74	3.772	1
+Contig53_chr37_6665763_6665919	116	C	T	111.0	chr37	6665875	C	9	0	2	54	9	0	2	54	5	0	2	42	9	0	2	54	8	0	2	51	10	0	2	57	N	15	10.875	1
+Contig42_chr37_9589176_9591269	252	G	A	25.1	chr37	9589430	G	10	0	2	40	13	0	2	66	18	0	2	81	21	0	2	90	9	0	2	54	17	0	2	78	N	67	1.170	2
+Contig2_chr37_17134963_17136513	1140	A	C	158.0	chr37	17136092	A	14	0	2	69	24	0	2	99	17	0	2	78	16	0	2	75	15	0	2	75	13	0	2	66	Y	12	0.053	1
+Contig18_chr37_17147806_17149851	291	T	G	112.0	chr37	17148084	T	4	6	1	45	16	0	2	75	17	0	2	78	14	0	2	69	22	0	2	93	13	0	2	66	Y	41	4.442	0
+Contig64_chr37_17606895_17607534	565	C	T	30.2	chr37	17607439	A	9	0	2	54	16	0	2	75	20	0	2	87	14	0	2	69	16	0	2	75	10	0	2	57	N	20	1.622	0
+Contig126_chr37_21587881_21590621	373	G	T	132.0	chr37	21588256	G	11	0	2	60	11	0	2	60	23	0	2	96	12	0	2	63	8	0	2	51	18	0	2	81	Y	12	0.549	0
+Contig2_chr37_31197993_31198256	182	C	T	39.6	chr37	31198171	T	6	0	2	45	10	0	2	57	7	0	2	48	9	0	2	54	10	0	2	57	12	0	2	63	N	2	0.595	0
+Contig46_chr37_31852376_31853555	825	A	G	111.0	chr37	31853191	G	19	0	2	84	14	0	2	69	15	0	2	72	7	0	2	48	8	0	2	51	16	0	2	75	Y	17	0.128	1
+Contig7_chr38_12217200_12218387	1163	A	T	44.4	chr38	12218353	A	11	0	2	60	13	0	2	66	17	0	2	78	10	0	2	57	11	0	2	60	11	0	2	60	Y	67	+99.	0
+Contig15_chr38_12282020_12282253	150	C	T	156.0	chr38	12282164	A	17	0	2	78	11	0	2	60	19	0	2	84	14	0	2	69	5	0	2	42	14	0	2	69	Y	26	2.952	1
+Contig4_chr38_14807432_14807747	275	A	G	36.5	chr38	14807715	G	1	0	2	30	2	0	2	33	2	0	2	33	4	0	2	39	1	0	2	30	0	0	-1	0	Y	28	+99.	1
+Contig6_chr38_16185744_16186110	325	A	G	74.9	chr38	16186061	A	5	0	2	42	3	0	2	36	9	0	2	54	7	0	2	48	1	0	2	30	12	0	2	63	Y	40	+99.	0
+Contig265_chrX_2689247_2689484	114	C	G	103.0	chrX	2689356	C	11	0	2	60	9	0	2	54	13	0	2	66	16	0	2	75	14	0	2	69	10	0	2	57	N	2	9.232	1
+Contig122_chrX_6026976_6027327	330	C	T	79.4	chrX	6027303	C	3	0	2	36	3	0	2	36	3	0	2	36	4	0	2	39	3	0	2	36	6	0	2	45	Y	30	+99.	0
+Contig15_chrX_15659909_15660340	15	A	C	14.9	chrX	15659924	C	1	0	2	30	1	0	2	30	3	0	2	36	6	0	2	45	2	0	2	33	0	0	-1	0	Y	216	+99.	1
+Contig12_chrX_23243561_23244412	479	C	G	67.7	chrX	23244037	C	2	0	2	33	4	2	2	8	2	6	1	43	7	0	2	48	6	0	2	45	4	0	2	39	Y	208	1.620	0
+Contig113_chrX_26287829_26288398	385	C	T	59.6	chrX	26288213	C	9	0	2	54	9	0	2	54	17	0	2	78	11	0	2	60	3	8	1	44	4	0	2	39	N	13	0.077	0
+Contig186_chrX_29118735_29118939	192	G	A	7.01	chrX	29118931	G	1	0	2	30	7	0	2	48	4	0	2	39	5	0	2	42	8	0	2	51	4	0	2	39	N	50	+99.	0
+Contig237_chrX_31256648_31257654	165	T	A	246.0	chrX	31256814	T	7	0	2	48	23	0	2	96	19	0	2	84	17	0	2	78	14	0	2	69	8	0	2	51	Y	37	1.481	0
+Contig25_chrX_40729418_40730089	332	C	T	31.2	chrX	40729745	C	0	0	-1	0	2	0	2	33	4	0	2	39	5	0	2	42	3	0	2	36	3	0	2	36	Y	34	0.212	0
+Contig90_chrX_57430715_57431566	548	C	T	116.0	chrX	57431266	T	9	0	2	54	18	0	2	81	13	0	2	66	14	0	2	69	8	0	2	54	7	0	2	48	Y	261	0.154	1
+Contig133_chrX_84833782_84834125	182	G	A	69.7	chrX	84833962	G	5	0	2	42	18	0	2	81	12	0	2	63	19	0	2	84	6	3	1	27	7	0	2	48	N	619	0.278	0
+Contig129_chrX_90586053_90586467	135	A	T	120.0	chrX	90586195	A	1	0	2	30	6	0	2	45	8	0	2	51	5	0	2	42	1	0	2	30	2	0	2	33	N	637	0.245	0
+Contig125_chrX_93319363_93320877	349	A	C	145.0	chrX	93319721	A	4	0	2	39	6	0	2	45	11	0	2	60	10	0	2	57	13	0	2	66	6	0	2	45	Y	59	1.686	0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/test-data/test_out/add_fst_column/add_fst_column.gd_snp	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,21 @@
+Contig113_chr5_11052263_11052603	28	C	T	38.2	chr5	11052280	C	1	2	1	12	3	2	1	10	5	0	2	42	2	1	2	13	3	0	2	36	8	0	2	51	Y	161	+99.	0	0.1636
+Contig215_chr5_70946445_70947428	363	T	G	28.2	chr5	70946809	C	4	0	2	39	0	5	0	12	9	0	2	54	6	0	2	45	3	3	2	1	9	0	2	54	N	43	0.153	0	0.3846
+Contig132_chr7_20426224_20428145	1815	A	G	28.3	chr7	20428041	A	11	1	2	43	12	0	2	63	19	0	2	84	23	0	2	96	14	0	2	69	10	0	2	57	N	11	0.264	0	0.0213
+Contig30_chr8_17147743_17147923	13	G	A	105.0	chr8	17147756	A	1	3	1	19	1	0	2	30	3	0	2	36	1	0	2	30	1	0	2	30	3	0	2	36	N	6	+99.	0	0.4286
+Contig44_chr8_71186368_71188207	1455	G	T	147.0	chr8	71187818	G	4	10	1	74	3	0	2	36	20	0	2	87	12	0	2	63	8	0	2	51	10	0	2	57	Y	88	0.036	0	0.4167
+Contig103_chr11_8844784_8845095	214	T	G	135.0	chr11	8844993	T	1	1	2	12	10	0	2	57	5	4	1	26	2	3	1	13	2	7	1	34	1	1	2	13	Y	75	0.731	0	0.2101
+Contig37_chr13_15910164_15910426	245	G	A	32.9	chr13	-1	N	3	4	1	41	4	0	2	39	3	0	2	36	4	0	2	39	3	0	2	36	10	0	2	57	N	-1	2.159	1	0.2222
+Contig50_chr17_12247973_12249183	889	G	T	47.6	chr17	12248878	G	0	1	2	9	8	0	2	51	9	2	2	21	7	2	2	21	15	0	2	72	0	3	0	9	Y	1	1.181	0	0.0150
+Contig159_chr22_7896450_7896974	109	G	C	151.0	chr22	7896570	G	16	0	2	75	5	7	1	62	14	0	2	69	16	0	2	75	13	0	2	66	13	0	2	66	Y	16	0.465	0	0.1429
+Contig77_chr22_49764414_49764875	353	C	A	148.0	chr22	49764777	C	7	4	1	65	18	0	2	81	16	0	2	75	20	0	2	87	4	3	1	52	9	4	1	67	Y	12	0.941	0	0.0741
+Contig61_chr24_30465488_30465834	149	G	T	68.2	chr24	30465637	G	13	0	2	66	4	2	2	11	18	0	2	81	11	0	2	60	11	0	2	60	9	0	2	54	N	99	0.105	2	0.0556
+Contig59_chr25_18196776_18197707	785	G	A	112.0	chr25	18197551	G	8	10	1	42	27	0	2	108	21	0	2	90	18	0	2	81	10	0	2	57	14	0	2	69	N	36	3.625	0	0.1250
+Contig85_chr27_45471750_45472022	211	G	A	53.1	chr27	45471964	G	18	0	2	81	10	0	2	57	15	0	2	72	0	13	0	36	16	0	2	75	14	0	2	69	N	75	2.502	1	0.3023
+Contig175_chr28_36441165_36441915	68	T	C	3.83	chr28	36441234	T	4	4	1	15	6	0	2	45	12	0	2	63	15	0	2	72	6	0	2	45	9	0	2	54	N	4	1.610	2	0.1667
+Contig114_chr30_33636712_33637208	34	C	T	142.0	chr30	33636744	C	7	0	2	48	4	1	2	20	6	0	2	45	6	0	2	45	3	4	1	29	5	0	2	42	Y	14	8.028	0	0.0435
+Contig42_chr32_38900713_38901320	320	A	G	134.0	chr32	38901021	T	12	0	2	63	10	0	2	57	9	11	1	104	5	0	2	42	19	0	2	84	7	6	1	56	Y	71	0.165	0	0.2821
+Contig41_chr34_16544482_16545449	46	T	C	102.0	chr34	16544523	T	5	0	2	42	11	0	2	60	6	0	2	45	0	2	0	3	7	0	2	48	8	0	2	51	Y	215	1.156	0	0.1429
+Contig19_chr35_23887144_23888282	90	C	A	10.1	chr35	23887242	-	3	3	1	12	4	4	1	19	8	6	1	37	4	3	1	11	8	3	2	7	9	3	2	11	Y	105	0.199	0	0.0051
+Contig74_chr35_25394343_25394813	303	A	T	221.0	chr35	25394646	G	23	0	2	96	15	0	2	72	25	0	2	105	7	7	1	49	18	0	2	81	16	0	2	75	Y	58	4.298	0	0.0986
+Contig18_chr37_17147806_17149851	291	T	G	112.0	chr37	17148084	T	4	6	1	45	16	0	2	75	17	0	2	78	14	0	2	69	22	0	2	93	13	0	2	66	Y	41	4.442	0	0.1304
+Contig12_chrX_23243561_23244412	479	C	G	67.7	chrX	23244037	C	2	0	2	33	4	2	2	8	2	6	1	43	7	0	2	48	6	0	2	45	4	0	2	39	Y	208	1.620	0	0.0256
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/test-data/test_out/average_fst/average_fst.txt	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,1 @@
+average Fst is 0.16461, using 21 SNPs
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/test-data/test_out/coverage_distributions/coverage.html	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,39 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <meta http-equiv="Content-type" content="text/html; charset=UTF-8" />
+    <link rel="stylesheet" href="/static/style/base.css" type="text/css" />
+    <title>Coverage distributions Galaxy Composite Dataset</title>
+  </head>
+  <body>
+    <div class="document">
+      Output completed: 2012-04-03 01:57:24 PM
+      <p/>
+      <div id="gd_outputs">
+        Outputs
+        <ul>
+            <li><a href="coverage.pdf">coverage.pdf</a></li>
+            <li><a href="coverage.txt">coverage.txt</a></li>
+        </ul>
+      </div>
+      <div id="gd_inputs">
+        Inputs
+        <ul>
+            <li>Data source: sequence coverage</li>
+        </ul>
+      </div>
+      <div id="gd_misc">
+        Individuals
+<ol>
+<li>PB1</li>
+<li>PB2</li>
+<li>PB3</li>
+<li>PB4</li>
+<li>PB6</li>
+<li>PB8</li>
+</ol>
+      </div>
+    </div>
+  </body>
+</html>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/test-data/test_out/coverage_distributions/coverage.pdf	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,363 @@
+%PDF-1.4
+%���ρ�\r
+1 0 obj
+<<
+/CreationDate (D:20120403135724)
+/ModDate (D:20120403135724)
+/Title (R Graphics Output)
+/Producer (R 2.11.0)
+/Creator (R)
+>>
+endobj
+2 0 obj
+<<
+/Type /Catalog
+/Pages 3 0 R
+>>
+endobj
+5 0 obj
+<<
+/Type /Page
+/Parent 3 0 R
+/Contents 6 0 R
+/Resources 4 0 R
+>>
+endobj
+6 0 obj
+<<
+/Length 7 0 R
+>>
+stream
+1 J 1 j q
+Q q 59.04 73.44 630.72 299.52 re W n
+1.000 0.000 0.000 RG
+2.25 w
+[] 0 d
+1 J
+1 j
+10.00 M
+82.40 174.26 m
+106.73 206.89 l
+131.07 206.89 l
+155.40 263.98 l
+179.73 263.98 l
+204.07 223.20 l
+228.40 312.93 l
+252.73 304.77 l
+277.07 255.83 l
+301.40 280.30 l
+325.73 312.93 l
+350.07 321.08 l
+374.40 255.83 l
+398.73 263.98 l
+423.07 231.36 l
+447.40 231.36 l
+471.73 174.26 l
+496.07 215.04 l
+520.40 174.26 l
+544.73 133.47 l
+569.07 157.95 l
+593.40 109.00 l
+617.73 109.00 l
+642.07 92.69 l
+666.40 84.53 l
+S
+Q q
+0.000 0.000 0.000 RG
+0.75 w
+[] 0 d
+1 J
+1 j
+10.00 M
+82.40 73.44 m 569.07 73.44 l S
+82.40 73.44 m 82.40 66.24 l S
+204.07 73.44 m 204.07 66.24 l S
+325.73 73.44 m 325.73 66.24 l S
+447.40 73.44 m 447.40 66.24 l S
+569.07 73.44 m 569.07 66.24 l S
+BT
+0.000 0.000 0.000 rg
+/F2 1 Tf 12.00 0.00 -0.00 12.00 79.06 47.52 Tm (0) Tj
+ET
+BT
+/F2 1 Tf 12.00 0.00 -0.00 12.00 200.73 47.52 Tm (5) Tj
+ET
+BT
+/F2 1 Tf 12.00 0.00 -0.00 12.00 319.06 47.52 Tm (10) Tj
+ET
+BT
+/F2 1 Tf 12.00 0.00 -0.00 12.00 440.73 47.52 Tm (15) Tj
+ET
+BT
+/F2 1 Tf 12.00 0.00 -0.00 12.00 562.39 47.52 Tm (20) Tj
+ET
+59.04 84.53 m 59.04 345.55 l S
+59.04 84.53 m 51.84 84.53 l S
+59.04 149.79 m 51.84 149.79 l S
+59.04 215.04 m 51.84 215.04 l S
+59.04 280.30 m 51.84 280.30 l S
+59.04 345.55 m 51.84 345.55 l S
+BT
+/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 72.86 Tm (0.00) Tj
+ET
+BT
+/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 138.11 Tm (0.02) Tj
+ET
+BT
+/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 203.37 Tm (0.04) Tj
+ET
+BT
+/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 268.62 Tm (0.06) Tj
+ET
+BT
+/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 333.88 Tm (0.08) Tj
+ET
+59.04 73.44 m
+689.76 73.44 l
+689.76 372.96 l
+59.04 372.96 l
+59.04 73.44 l
+S
+Q q
+BT
+0.000 0.000 0.000 rg
+/F2 1 Tf 12.00 0.00 -0.00 12.00 348.69 18.72 Tm [(Co) 15 (v) 25 (er) 10 (age)] TJ
+ET
+BT
+/F2 1 Tf 0.00 12.00 -12.00 0.00 12.96 195.28 Tm [(Propor) -40 (tion)] TJ
+ET
+Q q 59.04 73.44 630.72 299.52 re W n
+1.000 1.000 0.000 RG
+2.25 w
+[] 0 d
+1 J
+1 j
+10.00 M
+82.40 157.95 m
+106.73 166.10 l
+131.07 231.36 l
+155.40 215.04 l
+179.73 280.30 l
+204.07 263.98 l
+228.40 272.14 l
+252.73 231.36 l
+277.07 345.55 l
+301.40 321.08 l
+325.73 288.45 l
+350.07 329.24 l
+374.40 255.83 l
+398.73 280.30 l
+423.07 247.67 l
+447.40 239.51 l
+471.73 215.04 l
+496.07 157.95 l
+520.40 174.26 l
+544.73 166.10 l
+569.07 133.47 l
+593.40 92.69 l
+617.73 100.85 l
+642.07 100.85 l
+666.40 100.85 l
+S
+0.000 1.000 0.000 RG
+82.40 141.63 m
+106.73 166.10 l
+131.07 182.42 l
+155.40 182.42 l
+179.73 231.36 l
+204.07 198.73 l
+228.40 206.89 l
+252.73 263.98 l
+277.07 263.98 l
+301.40 263.98 l
+325.73 239.51 l
+350.07 280.30 l
+374.40 198.73 l
+398.73 304.77 l
+423.07 231.36 l
+447.40 247.67 l
+471.73 239.51 l
+496.07 239.51 l
+520.40 215.04 l
+544.73 198.73 l
+569.07 231.36 l
+593.40 149.79 l
+617.73 166.10 l
+642.07 166.10 l
+666.40 100.85 l
+S
+0.000 1.000 1.000 RG
+82.40 133.47 m
+106.73 133.47 l
+131.07 255.83 l
+155.40 231.36 l
+179.73 272.14 l
+204.07 272.14 l
+228.40 337.40 l
+252.73 280.30 l
+277.07 280.30 l
+301.40 280.30 l
+325.73 337.40 l
+350.07 288.45 l
+374.40 296.61 l
+398.73 223.20 l
+423.07 272.14 l
+447.40 255.83 l
+471.73 239.51 l
+496.07 190.57 l
+520.40 117.16 l
+544.73 125.32 l
+569.07 149.79 l
+593.40 109.00 l
+617.73 109.00 l
+642.07 92.69 l
+666.40 92.69 l
+S
+0.000 0.000 1.000 RG
+82.40 157.95 m
+106.73 190.57 l
+131.07 215.04 l
+155.40 288.45 l
+179.73 231.36 l
+204.07 272.14 l
+228.40 272.14 l
+252.73 280.30 l
+277.07 296.61 l
+301.40 361.87 l
+325.73 329.24 l
+350.07 329.24 l
+374.40 296.61 l
+398.73 272.14 l
+423.07 215.04 l
+447.40 239.51 l
+471.73 190.57 l
+496.07 157.95 l
+520.40 166.10 l
+544.73 125.32 l
+569.07 100.85 l
+593.40 92.69 l
+617.73 109.00 l
+642.07 84.53 l
+666.40 92.69 l
+S
+1.000 0.000 1.000 RG
+82.40 198.73 m
+106.73 157.95 l
+131.07 215.04 l
+155.40 215.04 l
+179.73 304.77 l
+204.07 223.20 l
+228.40 321.08 l
+252.73 361.87 l
+277.07 280.30 l
+301.40 280.30 l
+325.73 329.24 l
+350.07 280.30 l
+374.40 337.40 l
+398.73 231.36 l
+423.07 272.14 l
+447.40 223.20 l
+471.73 174.26 l
+496.07 198.73 l
+520.40 149.79 l
+544.73 117.16 l
+569.07 100.85 l
+593.40 109.00 l
+617.73 100.85 l
+642.07 84.53 l
+666.40 100.85 l
+S
+1.000 0.000 0.000 rg
+0.000 0.000 0.000 RG
+0.75 w
+[] 0 d
+642.24 362.16 8.64 -7.20 re B
+1.000 1.000 0.000 rg
+642.24 347.76 8.64 -7.20 re B
+0.000 1.000 0.000 rg
+642.24 333.36 8.64 -7.20 re B
+0.000 1.000 1.000 rg
+642.24 318.96 8.64 -7.20 re B
+0.000 0.000 1.000 rg
+642.24 304.56 8.64 -7.20 re B
+1.000 0.000 1.000 rg
+642.24 290.16 8.64 -7.20 re B
+BT
+0.000 0.000 0.000 rg
+/F2 1 Tf 12.00 0.00 -0.00 12.00 661.68 354.25 Tm (PB1) Tj
+ET
+BT
+/F2 1 Tf 12.00 0.00 -0.00 12.00 661.68 339.85 Tm (PB2) Tj
+ET
+BT
+/F2 1 Tf 12.00 0.00 -0.00 12.00 661.68 325.45 Tm (PB3) Tj
+ET
+BT
+/F2 1 Tf 12.00 0.00 -0.00 12.00 661.68 311.05 Tm (PB4) Tj
+ET
+BT
+/F2 1 Tf 12.00 0.00 -0.00 12.00 661.68 296.65 Tm (PB6) Tj
+ET
+BT
+/F2 1 Tf 12.00 0.00 -0.00 12.00 661.68 282.25 Tm (PB8) Tj
+ET
+Q
+endstream
+endobj
+7 0 obj
+4763
+endobj
+3 0 obj
+<<
+/Type /Pages
+/Kids [
+5 0 R
+]
+/Count 1
+/MediaBox [0 0 720 432]
+>>
+endobj
+4 0 obj
+<<
+/ProcSet [/PDF /Text]
+/Font <</F2 9 0 R >>
+/ExtGState << >>
+>>
+endobj
+8 0 obj
+<<
+/Type /Encoding
+/BaseEncoding /WinAnsiEncoding
+/Differences [ 45/minus 96/quoteleft
+144/dotlessi /grave /acute /circumflex /tilde /macron /breve /dotaccent
+/dieresis /.notdef /ring /cedilla /.notdef /hungarumlaut /ogonek /caron /space]
+>>
+endobj
+9 0 obj <<
+/Type /Font
+/Subtype /Type1
+/Name /F2
+/BaseFont /Helvetica
+/Encoding 8 0 R
+>> endobj
+xref
+0 10
+0000000000 65535 f
+0000000021 00000 n
+0000000164 00000 n
+0000005129 00000 n
+0000005212 00000 n
+0000000213 00000 n
+0000000293 00000 n
+0000005109 00000 n
+0000005293 00000 n
+0000005550 00000 n
+trailer
+<<
+/Size 10
+/Info 1 0 R
+/Root 2 0 R
+>>
+startxref
+5646
+%%EOF
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/test-data/test_out/coverage_distributions/coverage.txt	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,18 @@
+
+            0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19
+       PB1  2  6 10 15 21 25 32 39 44 50 57 64 70 75 80 84 87 91 94 95
+       PB2  2  4  9 13 19 24 30 35 43 50 56 64 69 75 80 85 89 91 94 96
+       PB3  1  4  7 10 14 18 22 27 33 38 43 49 52 59 64 69 73 78 82 86
+       PB4  1  3  8 12 18 24 32 38 44 50 57 64 70 74 80 85 90 93 94 96
+       PB6  2  5  9 15 20 26 31 37 44 52 60 67 74 80 84 88 92 94 96 98
+       PB8  3  5  9 13 20 24 32 40 46 52 60 66 73 78 84 88 91 94 96 97
+
+
+           20 21 22 23 24
+       PB1 97 98 99 99 99
+       PB2 98 98 98 99 99
+       PB3 90 92 95 97 98
+       PB4 98 98 99 99 99
+       PB6 98 98 99 99 99
+       PB8 98 98 99 99 99
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/test-data/test_out/dpmix/dpmix.html	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,56 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <meta http-equiv="Content-type" content="text/html; charset=UTF-8" />
+    <link rel="stylesheet" href="/static/style/base.css" type="text/css" />
+    <title>dpmix Galaxy Composite Dataset</title>
+  </head>
+  <body>
+    <div class="document">
+      Output completed: 2012-04-03 02:22:23 PM
+      <p/>
+      <div id="gd_outputs">
+        Outputs
+        <ul>
+            <li><a href="dpmix.pdf">dpmix.pdf</a></li>
+            <li><a href="misc.txt">misc.txt</a></li>
+        </ul>
+      </div>
+      <div id="gd_inputs">
+        Inputs
+        <ul>
+            <li>Data source: sequence coverage</li>
+            <li>Switch penalty: 10</li>
+            <li>Also analyze random chromosome: no</li>
+        </ul>
+      </div>
+      <div id="gd_misc">
+        Populations
+<ul>
+<li>
+Ancestral population 1
+<ol>
+<li>PB1</li>
+<li>PB2</li>
+</ol>
+</li>
+<li>
+Ancestral population 2
+<ol>
+<li>PB3</li>
+<li>PB4</li>
+</ol>
+</li>
+<li>
+Potentially admixed
+<ol>
+<li>PB6</li>
+<li>PB8</li>
+</ol>
+</li>
+</ul>
+      </div>
+    </div>
+  </body>
+</html>
Binary file genome_diversity/test-data/test_out/dpmix/dpmix.pdf has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/test-data/test_out/dpmix/dpmix.tabular	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,78 @@
+chr1	0	125154818	0	PB6
+chr1	0	125154818	0	PB8
+chr2	0	85243509	0	PB6
+chr2	0	85243509	0	PB8
+chr3	0	92410450	0	PB6
+chr3	0	92410450	0	PB8
+chr4	0	75619257	0	PB6
+chr4	0	75619257	0	PB8
+chr5	0	90203461	0	PB6
+chr5	0	90203461	0	PB8
+chr6	0	74848993	0	PB6
+chr6	0	74848993	0	PB8
+chr7	0	55833450	0	PB6
+chr7	0	55833450	0	PB8
+chr8	0	71187818	0	PB6
+chr8	0	71187818	0	PB8
+chr9	0	39008708	0	PB6
+chr9	0	39008708	0	PB8
+chr10	0	59511126	0	PB6
+chr10	0	59511126	0	PB8
+chr11	0	53408638	0	PB6
+chr11	0	53408638	2	PB8
+chr12	0	71364712	0	PB6
+chr12	0	71364712	0	PB8
+chr13	0	66022136	0	PB6
+chr13	0	66022136	0	PB8
+chr14	0	56768832	0	PB6
+chr14	0	56768832	0	PB8
+chr15	0	45107015	0	PB6
+chr15	0	45107015	0	PB8
+chr16	0	49888550	0	PB6
+chr16	0	49888550	0	PB8
+chr17	0	61714821	2	PB6
+chr17	0	61714821	0	PB8
+chr18	0	58130413	0	PB6
+chr18	0	58130413	0	PB8
+chr19	0	56559549	0	PB6
+chr19	0	56559549	0	PB8
+chr20	0	46551277	0	PB6
+chr20	0	46551277	0	PB8
+chr21	0	43475551	0	PB6
+chr21	0	43475551	0	PB8
+chr22	0	62406302	0	PB6
+chr22	0	62406302	0	PB8
+chr23	0	48285470	0	PB6
+chr23	0	48285470	0	PB8
+chr24	0	46598214	0	PB6
+chr24	0	46598214	0	PB8
+chr25	0	51074589	0	PB6
+chr25	0	51074589	0	PB8
+chr26	0	36606979	0	PB6
+chr26	0	36606979	0	PB8
+chr27	0	45471964	2	PB6
+chr27	0	45471964	2	PB8
+chr28	0	36441234	0	PB6
+chr28	0	36441234	0	PB8
+chr29	0	21150118	0	PB6
+chr29	0	21150118	0	PB8
+chr30	0	33636744	2	PB6
+chr30	0	33636744	0	PB8
+chr31	0	26434322	0	PB6
+chr31	0	26434322	0	PB8
+chr32	0	38901021	2	PB6
+chr32	0	38901021	0	PB8
+chr33	0	26189703	0	PB6
+chr33	0	26189703	0	PB8
+chr34	0	42800126	2	PB6
+chr34	0	42800126	2	PB8
+chr35	0	25394646	2	PB6
+chr35	0	25394646	2	PB8
+chr36	0	32954182	0	PB6
+chr36	0	32954182	0	PB8
+chr37	0	31853191	0	PB6
+chr37	0	31853191	0	PB8
+chr38	0	16186061	0	PB6
+chr38	0	16186061	0	PB8
+chrX	0	93319721	2	PB6
+chrX	0	93319721	2	PB8
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/test-data/test_out/dpmix/misc.txt	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,11 @@
+state 2 agrees with: PB1 PB2
+state 0 agrees with: PB3 PB4
+
+PB6: 360 SNPs where state 2 is as likely as state 0
+PB6: 12 SNPs where state 0 is more likely than state 2
+
+PB8: 358 SNPs where state 2 is as likely as state 0
+PB8: 14 SNPs where state 0 is more likely than state 2
+
+PB6: 0 = 83.7%, 1 = 0.0%, 2 = 16.3%
+PB8: 0 = 87.6%, 1 = 0.0%, 2 = 12.4%
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/test-data/test_out/evaluate_population_numbers/evaluate_population_numbers.txt	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,2 @@
+CV error (K=1): 0.07423
+CV error (K=2): 0.07708
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/test-data/test_out/extract_primers/extract_primers.txt	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,1265 @@
+> Contig161_chr1_4641264_4641879 115 C T 0.323016
+
+  1 TCCGAACCGCTAAATCCTGACGACTGTTCAGTGAGAACGGGnTTCCAGCTCAGTGGAGAC
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 ACTCAGAGCTTATGTGATGCACCGTCGTGCCCGTGTCTGACTAAATGTGTTGCCAGAGAA
+                                                            <<<<
+
+121 CAAAACGAAAGCCCCTATT
+    <<<<<<<<<<<<<<<<
+
+> Contig86_chr1_30984450_30985684 670 C T 0.031427
+
+  1 TAATTCATGACGACTGCAGAAGGGCACTCAGAGGCAATTCTACTTGAGGATATTGTCTGG
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 TATACTCTGTCCTTGCTCAGGACATCAGTGAGAACATAGAAACATTCACnTCCCCACACC
+
+
+121 GAAAGCGTCTGTAGACCGGCCCACGGGCCGAAGTCTTTGCATTTCCTCTTGCCATGCACG
+
+
+181 AGCATTCCCAGTGGCAATCAGGGGCCAGCCCTTCTGTTTGGCCTCTGCAAGCTTGTATCC
+                                            <<<<<<<<<<<<<<<<<<<<
+
+241 TTG
+
+
+> Contig21_chr1_60697952_60699446 307 G A 0.507396
+
+  1 TCTGGGGCCATGTTTCTGAAGTAAGGCTGTTTCTGCAGCCTTGCGGGCTGTGTCTTGCTC
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 nCACCCCTTAATTCTTACCTGTAGGTGGTATTTGGTAGAGTGGAGTAAAACTGGAAACTG
+                                                              <<
+
+121 GTTCTCTGTGTTCCTGCATCT
+    <<<<<<<<<<<<<<<<<<
+
+> Contig64_chr1_87343284_87345672 163 T A 0.038702
+ VspI
+  1 ATGGCCAATTCTGGTTTAcGCATCATTGTTAACAACTCTTCCATTCATTCTCAGAATTTT
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 CCCAATTCACATGATAAATTGTATGGTCACCTACcTACAACTAAACACTTAGTTTATTTC
+
+
+121 TATTATTATTATTATTATTATTATTATTATTAnTAtTATTATTGAAATACATTTTTTTTT
+
+
+181 CATAAACCGTTCACcCTTGTGAGAAC
+       <<<<<<<<<<<<<<<<<<<<
+
+> Contig20_chr1_110679280_110679687 181 C T 0.659726
+
+  1 GAGCACTCAATGAGGGGTTCGACCCTTTGCAGACACAGCATGTAGGAGGAAGAAATGCAA
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 cGGGGCACCCCTGCGGGGGCAGGCTTCCAGTTCAAACTGATCnGGTCTGGTCCTGGGGCC
+
+
+121 GGGCCAAAGTTGTGGTTTCcCGCACTCAAGTCTCCAC
+                  <<<<<<<<<<<<<<<<<<<<
+
+> Contig222_chr2_9817738_9818143 220 C T 0.092668
+ SpeI
+  1 AGATTTAGCTGGAGCATGCCTTTGCCCTTTTTAGCCTTTCCCTTTTACCTTTATCCTTCT
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 TATTCTTGAAATGTTGAAATAGATGGAAGTATAGCAGCTATCTTGTCCCATAATGATGAA
+
+
+121 AACCAGGTACAAAGTTGGTGAAAACTAAAAGAGAGGAGGAGCCTGGGTTCTTGGTGGCAT
+
+
+181 CATGAACACCTGCACnAGTCTAGCATGGTCTGTGCAAAATCTCCTGATCCAAGAAAAATA
+
+
+241 TAAACATCCTTCTGTAGGGTTTTATTgCCTGAAGCAAAA
+                    <<<<<<<<<<<<<<<<<<<<
+
+> Contig47_chr2_25470778_25471576 126 G A 0.289103
+ Bsp1286I
+  1 GCCAGGCGTCCCTCTTTTTGAGTTCtAATTGTGTACATCCAATCCCCATCTCAACAAATA
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 GCTGAACCAGCTTCCTaTTTATTTGGTAGGTnAGCACTCTAGAAATTTGCTACACTGAAC
+
+
+121 TCACCAAATTTATAATGTaAATTATGACCATTCTTTGCCATAATAATTTGGGGTAGGTCA
+
+
+181 GATTTGGTTTTGGGGGCAGAAGAAATCATCATATCACAAGCATGTGACAGCTTCCAGCCC
+                                                             <<<
+
+241 CATCTCAACTCCAAGAAATT
+    <<<<<<<<<<<<<<<<<
+
+> Contig6_chr2_56859179_56859956 671 T C 5.308026
+ MspA1I
+  1 TATCCCAAAGACGTGTGTCTCAAAGCCCTGAGGTTTACAGCCAAACATGATGGACTGCCC
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 ATGACAAcGGATACAAATGCTAGCgTGGGTTTAATTATGCTAGAATTTTTATGATAATTA
+
+
+121 TAATGATATTGTTATGAAGTATGCTAGGCTTTnAGCGGCTAGTCTCTAAACCTATTTTCC
+
+
+181 tTATAAATCCTTTTATTTTTAGTGCACTATTTTATAGAATAAGAGGTTTTTCAGGAACAC
+                                                <<<<<<<<<<<<<<<<
+
+241 ATATATTGCATT
+    <<<<<<<<<
+
+> Contig163_chr2_76402959_76404830 221 C T 0.178077
+
+  1 GCCCCTTCGAGTCCATCTTaCGCgCAGCAGCAGGAGGGATGGTCCCAACCACAAACCTAC
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 CCGCTGCCTGAACGCTTnAAGTGCCCTCCGAAGAAAGCCCAACTCCACAGCCTGGCAACT
+
+
+121 GAGGTCCTTGTGATCTTAGCTTCCTCTGCCCCACTCCACAGCTCAGCCTCACCgGACTCC
+
+
+181 CGAGCTCCTTAAAGGAGCCCCCGAGCCCCCGCACATGCTGTTCCCTGTAACCGGGTACTC
+
+
+241 CACGGCTCGTCTGTCCTTGGAGGCTCAGCTG
+            <<<<<<<<<<<<<<<<<<<<
+
+> Contig56_chr3_17326225_17327548 387 G C 0.224947
+ AgeI,HpaII,MspI
+  1 CAAAGGCAGTGATATGGGAGTGGAATGGAGAGGATGGGTGCCCCAGACTGGGTGCAGATC
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 TGTTCTATCTGGTGTTTGGTGGCTGACCATACnGGTGAGAAGAAGTGTcCAGGTTTCTGG
+
+
+121 CTTGATGATGCCGACAGTTATGGCAGGAAATGCTGAAGGGGTGCACATGAGCTCCTGTTC
+
+
+181 ATTCTTCACTCTTCCTCTTCTACCTCCAACCTTGCTACCTGTGTGTACCCGACTC
+                                    <<<<<<<<<<<<<<<<<<<<
+
+> Contig108_chr3_46210055_46210874 367 A G 0.027845
+
+  1 TTCACTCACCTGCTTCCCTGCTAACTGTCACCGCCCTCCCAATGCCTTAAACCAGCTTAG
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 AAACACAAAATTTAAAAAACATTATGTTGAGACAAAAATATGTATAACCTGGAATATTGA
+
+
+121 ATAACAAAATGAAAGGGAAAATGATTCAAGAACACTTGGATAAGGAAAACTACAAATATT
+
+
+181 nAAGATGTACCTTTGAACTTCCTATCACTGAAAGCAACCATGGAACCAGTACAATGTAGA
+
+
+241 CCTTCTGATCTGACTTTCTTTTGTCTCTTGCTGCTGGGAAGTAGAATGCCCC
+                                 <<<<<<<<<<<<<<<<<<<<
+
+> Contig1_chr3_51588422_51589409 926 A G 1.147200
+
+  1 AGATTATGGCCTGTGTTTACcCCAGCCTcGCAGAACATTTTACTGGGGACACCTGCCAGG
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 TGGCAGATCAGAAGCCCGTGAGGCAGCCAGCCAATGGGAtGGCCAAAACCTAGGGCTTCG
+
+
+121 TAnGGGAGGGAGATGTTTTCCTCgTCCCTCT
+            <<<<<<<<<<<<<<<<<<<<
+
+> Contig65_chr3_80727952_80728283 39 T C 7.077725
+
+  1 CAAAGGCTTACTTTTTaGATCAACACTCTAAATTCTTAAnAAACAACAAAGCCAAATTTT
+    >>>>>>>>>>>>>>>>>>>>>>>>>>
+
+ 61 CCTATATCATTGAGTAGTTGATACGTCTTTGGTTTTGCGCTAGCAGT
+                            <<<<<<<<<<<<<<<<<<<<
+
+> Contig134_chr4_12145648_12148225 1326 C T 0.079565
+ BalI
+  1 AACCCAGAtCAGAAACGTCCCATGGCTAGTCATCTTCCTACACAGACTTCTgAGAGCCAA
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 GCATCGTCAAcCGGCCAtTCTnGGCCATTCTCCCGAGCAGATGCTGCCGGGATAATCTGC
+
+
+121 AGCATGAAGCCCTCCCTCGGGGGAGACCCGACcgGGTCCACACAGGTCTGTcTAGC
+                                     <<<<<<<<<<<<<<<<<<<<
+
+> Contig19_chr4_26233601_26233991 146 G C 0.163005
+ DpnI,MboI,Sau3AI
+  1 AATTTGGCTTCCTCTGGAGTtGTCCCTTAATGCTAGGTATCAAGTGCTGACAGGCCACAG
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 ATnAGGGTAACACATGATTACAGGGCAACACACTGTAACACGTATTCCCTTGCCTTGTCT
+                                          <<<<<<<<<<<<<<<<<<<<
+
+121 T
+
+
+> Contig17_chr4_61310346_61311158 267 C T 0.097708
+
+  1 TATTCCAGACCAACCAAAAGGTCTAAGGAATAATAGAAGCTTCACCCACAGACCTGCCAC
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 CCAACTTGAGAAACAGCACTTGCTTCCTCATAGAGTCGAAACGTCTTCGGTGGGTCCCCT
+
+
+121 CCTGAAGCATCACCGCTACCTTTCCTCTTGGGAGTCACTGCCACCCnGAACTTGTTGCTG
+
+
+181 CTTATTCTCTTTTATTTTTCTTGTTTTTGAAAGAACCCTGTCTTGGGTGTTAGGATAC
+                                       <<<<<<<<<<<<<<<<<<<<
+
+> Contig31_chr5_4734956_4736547 1166 C T 0.020932
+
+  1 TGTTCTGCCATGCACACTTCTTCAACCCTTCAACCTGTGGGAGTCACCTCACATTCCCAC
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 AGcGAATGGAATATCTATCTATCTgnCTTTAGGGATTTGTTACGTTTTCTTTTTCTTCCT
+
+
+121 TTTCCTTCCAATATCTTAATGGGCAATTTTGTGGACAGTTGATAGAGACAACGTCAGGAG
+                                                         <<<<<<<
+
+181 CTGTTGGCCTAGTAAA
+    <<<<<<<<<<<<<
+
+> Contig6_chr5_26899813_26900498 97 A C 7.369943
+ AvaII,Sau96I,SinI
+  1 AACTGAAAGTGAGAATTCTTTGTATTTGCTAGTCAAAAGGATTTCTAAGTCAAAAAAGTA
+       >>>>>>>>>>>>>>>>>>>>>>>>>>
+
+ 61 ATTTGGGAnCATTAAGTCATATTTATAGACTAAAATTTCATTCCTAAAGACAATTTAGTA
+
+
+121 AAAATGCTAGGCTTTCTAGAAATTTAACCTAACATAAAAAATTACAGTAAGTTTGCTAAA
+
+
+181 GAATCACAGAGTTGACTGACAGTTTCCCAGGTTA
+               <<<<<<<<<<<<<<<<<<<<
+
+> Contig45_chr5_50892738_50892968 169 C A 0.496871
+
+  1 TGAAAGGGGCACTGGGAATTATCAGAACCTTCTGGGTAATTAAACTGGGGAAAGCATAAT
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 ACCATTTAGAAAAAGTTCAAGTGAGTCTTTTCCTTATTCTCCCnTGTACCCAGAAAAACC
+                                                     <<<<<<<<<<<
+
+121 TGGACATGGTAC
+    <<<<<<<<<
+
+> Contig45_chr5_76133561_76134403 388 A G 0.038045
+
+  1 CATGAGCATGCTGTCTGCACAAtGGGAGCACCCGTGATGTGAGAGTAGCCAGGCCACCCT
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 GGCTTGAGTGCTTTGTCCAAAAGGCACAATGGGAACTACACAGAAACAATCAGATTCACT
+
+
+121 GCCTTCGAGGGTTTGAAGAAGACAGCTGAAGAGTAGGAGGTAGAAnCAAAAAGGCATGAG
+
+
+181 AGGGGGAAGCAGAGGCTGCAAGACATGAGCTGGGCAGTACTGACgGGCCACACAGAGCAC
+                                                               <
+
+241 TGGAGACAAGGTCAGGAGCCCT
+    <<<<<<<<<<<<<<<<<<<
+
+> Contig111_chr6_5821219_5822519 1060 A G 0.230765
+ AvaI
+  1 CGTCAGAGCTGTCTTCCCTCCAGCCAGAGGGGCCCTGAGAAGGAAGGGGGCTGAACCCAG
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 GCgCCAGCCCAAGCTGCAGCGTGATCTGGGGGTGAGGCCCCCCGCTGCACAGGGGGCACG
+
+
+121 GGGGTTCGGGCAGAGATCGGCTACCCATGGCCGGCGAGGCCACAgTGGCAATGGGCAGCC
+
+
+181 AGCCTCCGACCAGCcGCCCCCnAGCTGCCTATTTAAGTCAGGAGCTTCTCCTTCCCgTGG
+                                                         <<<<<<<
+
+241 AAGTAGAGGACAAATT
+    <<<<<<<<<<<<<
+
+> Contig102_chr6_30271329_30271577 39 T G 1.158547
+
+  1 TCTTCCTTTATGCATCAGGGCAGCACCCTGGGGAGAAGnGGGGGGGACAcGTGTGTCCTG
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 GGGAAAGGGGTGTTCCCACTCCCTGCAATGCCTTCCCCCGCCCAGACCAGCAGTTCTCAG
+
+
+121 TCTTGACTGCATGGACTCTCCTGGAAGGCTTTAAAAAATGTGGAGGCCGAGGCTTACCCA
+
+
+181 tGACGGTTCTGACTGAATTGCTCTGGAGTAGGGCTTAGGCACTG
+                         <<<<<<<<<<<<<<<<<<<<
+
+> Contig112_chr6_51024554_51024851 100 A G 4.286925
+
+  1 CTTCATCATACCTATCATTGCCTATCGTTATACTATAGAGGTATTGTTCATTCTTTTTTA
+       >>>>>>>>>>>>>>>>>>>>>>>>
+
+ 61 TAGACTCATTGAGTAAAACTCAGGnCATGAGGGAAGGAACTTTGTCTCTTGTGCAATTCC
+
+
+121 CTATCCTCAGTCCTTAAATATATGTATGCTAcCCAATAGGCACCAAATAT
+                               <<<<<<<<<<<<<<<<<<<<
+
+> Contig84_chr7_6648683_6650255 1297 G A 0.165637
+
+  1 GTTAGTTGTGAACACTCCCCAGGTAAACTGGTGTAACTCTTGGGGCAAAGCATGGAGTCT
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 ACCCAAAAATGTAGAATTCTGCAGAGACAGCTGTTTCTTGGTTGGGTTTCTAGACCAGAA
+
+
+121 AATAGAAGATTATAATTATGGGTGGGAAATATATGTGCAAAAAAGTATAAAAGAAGAGGA
+
+
+181 ACAGAATAAAnGGAAATGGAAATGTTTGTAATTGATAGGGATGTGGATGTAAATGCCTGG
+                                                    <<<<<<<<<<<<
+
+241 CAGAGAGGAGG
+    <<<<<<<<
+
+> Contig206_chr7_26281823_26282074 103 C A 0.947486
+ NheI
+  1 ATCCACATTCGCACAGCTCCTAATATAATATTTCATTGTTAAAATACTTCTGATTGGCCT
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 AGGACATATTTTTACAACTGCCTTGACTTCAATTGCTAGnAGTAGCTTGCCAAAGAgGTG
+
+
+121 CTTTAATAAAGGAAATTAACTTCTTTTAATATGTTGACTGATATACCAAGGTTTTAGTGC
+
+
+181 TATTAGTTTACCTTCCCCAAAAGTGCTTA
+          <<<<<<<<<<<<<<<<<<<<
+
+> Contig38_chr7_50681997_50682600 42 T C 0.145997
+
+  1 TAGAGCTCTCAGCATCCAAGCAGAATCTACTGGGTCTGACTGnGTTCTGCTCTGTCACTG
+     >>>>>>>>>>>>>>>>>>>>
+
+ 61 GAATGACATTTCATTGCAGAGTACTCCTGCAGTACAACCAGGGCACAGCCTTTAAATTGA
+
+
+121 CCATGTCCCCTGGTCTaCTCTGCTGAGCTaTGCACGGGTCCCTTCTGGTTCAAACACAGA
+
+
+181 CTGATACAGCTCAGATGGAAGGGAGGCAGTTGCAGAGAAACAAA
+                         <<<<<<<<<<<<<<<<<<<<
+
+> Contig91_chr8_12804505_12805470 409 C A 0.175272
+
+  1 CTGTTTTCAGGGGCTACCTGCTATCTCCAGAACATGCCTGGCTCTCCTCCAAACACTGTT
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 CAAnCTGACCAAAGCAGAGAGCTGTATATGGACCACACATACCAAAAAAAAAAAAAAGAC
+
+
+121 AGTCCACACCCTCTGTATAATTATATGGTACAAATAATAGAGTTTTTGTTAACTACCAGC
+
+
+181 TCTTTTTACAAAGCCTATCAAgTATCATAGACAGTATAATGCTGTGATTGCATCTGTGAA
+                                           <<<<<<<<<<<<<<<<<<<<
+
+241 CC
+
+
+> Contig8_chr8_27811135_27812620 333 C T 0.272485
+
+  1 CTTCAAGGAAAGGAGGCAGTTTGGACAAGTCAAAAAAATCCCAAAACtTTGTACTATATA
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 AATCTGGCATATTTGTTGATGACanAATTGAGTTAGAAGCAAGAGTCAGAAGCTGACTTT
+
+
+121 CATGCTGTTTTTCTGTTGTTTTCTGCGGCTCCCCTATGTACTAGTTCTCTTCCgGTGTGC
+
+
+181 TGACAACTTCCAACTTcTCATAcCCTCTGCATTTCACGTTCTGC
+                         <<<<<<<<<<<<<<<<<<<<
+
+> Contig17_chr8_57490059_57490498 69 G T 0.522227
+ BglII,DpnI,MboI,Sau3AI,XhoII
+  1 CACCAGAAAACAGGCATGGAACAGATTCTTTCAnATCTTTAAGAACAAACCAGTCCTGCT
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 GACACATAGATTTTTGGACTTTTGGCCTCTGTAACTGTGAGAATAAATTTCTATTTTAAG
+
+
+121 CCATCTACTTTGTAGTAATTTGTTATGGCAGCCCTGAGAAATTA
+                         <<<<<<<<<<<<<<<<<<<<
+
+> Contig73_chr9_29451535_29452248 616 A G 0.448230
+ Eco47III,HaeII
+  1 ACCCAAGAGTCTGAGAGGCCCAGAGGCAGCTGGAGGCTGGAGGAGTCCCaCAGGCAAACC
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 CTCCATTCCATGCGCCCCAGGGAGGCCAGGAAATCAGCnCTCCCAGGAGCAGGGAAGCAG
+
+
+121 CAGTCCCTGGCATTGCCAGGGCAAGTGGCCACTCAGGGGAGAAAGGGGTGAGCTGGGGAG
+
+
+181 GGGGAAGAGGGGAGGGGAGGGAAGGCAGAGACGAAGAGAA
+                       <<<<<<<<<<<<<<<<<<<<
+
+> Contig96_chr9_39008495_39009278 215 A C 0.426539
+ SspI
+  1 TGGCAAACTCCTTGTGAATGCCACTACACTTTCTGGTCTCTGTATGTAATGCTAGATATT
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 ACTGACACTTACcGCTACAAAGGCAAGACAAGCAAGACAACTGACATACACCCAgGTATG
+
+
+121 GATCTATGAAGGGAGCTCCTTCTGCTAGAAAACAATATGTAAnTATTTACaTAACACCTA
+
+
+181 CAATTCTAAATGGGTAGTTTCCCACATGTGAGATTACATCTTCAAGAGCCAAAGGACAAT
+                                                  <<<<<<<<<<<<<<
+
+241 TTGTGCATC
+    <<<<<<
+
+> Contig22_chr10_15505382_15505589 172 T C 2.860867
+
+  1 CGCAGGCGCCCCAATTATTCTTAACTCCTTATCAAAAGTTTTCCTAATTGAAACTTAAGC
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 ATCACCTGTTTATTTCCTCTAAAATAAATGTATACATATAGAATTTCAGTAAGATAATGT
+
+
+121 CTCAAAGAAGATGATAGCCATGGGAGAGGCTTATATGTACTTCnTATAATAAACAACGTC
+                                                      <<<<<<<<<<
+
+181 CAGGTGTGATATT
+    <<<<<<<<<<
+
+> Contig69_chr10_40547265_40548153 371 G A 0.137642
+ Bsp1286I,CfoI,HaeII,HhaI
+  1 AAGGGGAAGAACTGAAGCGAGTGAGAAGCACGGAAGGACTTTTAGGTTTACAGCTGGGGT
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 CACTGGTCTTCGCTATGGATGCCTCTCTTAAAGGAAAGACTAATTCTCTGTGGGTACTGA
+
+
+121 AGGTGgGAGATGAATGTAGATGGGCnCTCGCATGTGTCAATGCTGACGGCTTGGTGAGAG
+
+
+181 GTTTGGTGCGAGGCCAAAAAGGCgGGGATgAGAGAAGGATGACCTAGGGAGACTGCAGGG
+
+
+241 TATTTAAAAGTTTGGGTCCAATTTTTCTCAAAGTGTGGCCAGTGCAC
+                            <<<<<<<<<<<<<<<<<<<<
+
+> Contig9_chr10_51475063_51476054 770 C T 0.393903
+
+  1 GTCTTCCTTCTAATCCCCaAGCcGTGAGAAGCTGTCTGAGCGCTCCTTGCTGGGCGTCCC
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 TGCATGCCTGTACTGGGGCACACCTACGCCCTGGGTCCTGCTnCTGAAACGGTGTCTCAT
+
+
+121 TTCTGTAATCGCTCCAAGCTTAATGGCTCTCAGCCTTGTGGGTTGCAGTGGAGAGAAAGC
+                                            <<<<<<<<<<<<<<<<<<<<
+
+181 ATT
+
+
+> Contig72_chr11_7142765_7143772 146 G A 1.137400
+
+  1 GGTGTAGTGAGGCTTCCACGAGCAGCCAGGCTTACAAACTCATCCTTAGCCTAAAAACTC
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 CACnAAGTCAAGTATCTTGTGGGTGTTGAAAACTGTTCCACTCTGCAGAGCACCTCTATA
+
+
+121 TGAAGTAATAATCATGGTATAATGTCCTTCTTCACATACCTGCCAAGAT
+                             <<<<<<<<<<<<<<<<<<<<<
+
+> Contig7_chr11_40017076_40017630 352 C T 0.336170
+
+  1 TCGGTCCTTCCTTGATCACATCTCCATGATCCTCCCACTGTTACTTGGAGGAGAATTGTT
+     >>>>>>>>>>>>>>>>>>>>
+
+ 61 GCTTCCACAAATCAGATCTCTTTATTTTTCATTTATTCAACAAATGTGGACTGAGCTCTT
+
+
+121 TGtATAGTACATTCTGTGGGCACTATTCACTAGACACACTGTAAACACTTCTGCTTCCTG
+
+
+181 ACTTTGTTCAGATCTACCCCCnTGCCTGATCTGCCCTCCCCACCTGGTTTTCATCTCAGG
+
+
+241 CTTAGGTCAAGCCTCATtTGCACTTC
+       <<<<<<<<<<<<<<<<<<<<
+
+> Contig16_chr11_53408448_53408790 187 A G 1.366749
+
+  1 ATATTGCCAGTTTTAATGGGTGATATTTAGTCCTCCAATTAGACCTCTTTAGTGCATTGG
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 ATACCAGTGAGCAATCATTCTGACAnAATTTCTGCTGCCTTGATTTTTGTGACAA
+                                    <<<<<<<<<<<<<<<<<<<<
+
+> Contig21_chr12_18403415_18404381 586 G T 0.068025
+
+  1 AGTTCCAATGTCAGAGTCCCTCCCTCTACCTCCTATCCAACCcGCTACTTTTTTTnTTTT
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 GTTTTACAACAAAAATAAACCTTCTTGTAACAATTCCAACAATTACAAAATAGAGTAAAA
+
+
+121 TGTTTAAGTCTCTACCTAAACACACTCATCCTCAGAGAAACTCACAGGTAATTTCGGTtC
+
+
+181 GTATCTTCCCAGACCCTCTTCTCAGcTTTCACACATACTACATACACATGAACTTCGAGC
+                                                 <<<<<<<<<<<<<<<
+
+241 TGGCTGTT
+    <<<<<
+
+> Contig41_chr12_25565452_25566993 475 G T 2.230501
+
+  1 TTACATAGCcAAGTGGGAAACAAAGcTACATTTTTnAATATTAATAAATCTGTTTTTTTA
+       >>>>>>>>>>>>>>>>>>>>>
+
+ 61 AAGGGTTaTTATACAATATTATCAAACTTCTTGTGAATGTCAGAATCCAGAACAAACCTA
+
+
+121 AAATCAGTAATACTTGGGAAAGACGCAAATAGTCCCTCTTTCCACT
+                           <<<<<<<<<<<<<<<<<<<<
+
+> Contig5_chr12_53880670_53882675 1221 A C 0.061001
+
+  1 AAGCCATCCATGTGTGTGCTTTCATAATATATTATGACAGGAGATTAAATTCTAAGTAAA
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 GATTAGTCCCCAGTACAGTAGTGTAAAATAGGACTTTTCTCCCTTTTTCTCTCCnCGATA
+
+
+121 TTCCAAATCAGAGTTTGGCCAAAAAgAAGTCAATAAGGACTTAcAAAAAAAAAATCTCCA
+
+
+181 TTCACTGAAAGTAGCTTGCTAGCATTTTCCTTTCTcCTGATGTTGCTCCATAACTTCAAC
+
+
+241 CCTTTTTAAAACTGTCTACTGTGGGGTAGACAGAAGGCGTGGTCGTGAGGTAAAGGTCAA
+                                         <<<<<<<<<<<<<<<<<<<<
+
+> Contig107_chr13_26045881_26046290 341 C G 4.509990
+
+  1 CATAGATTGCCTTTTCCAGTCcAGAAGTTTAGAACAGACTGCCCTGAGATCATGGTGGGA
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 AATATAATACTCATTAGGTTgTTGAAATTCTTGTAGGAATGGAAGAATTTCAGCTTAGGC
+
+
+121 ATTCTGCTnCTGTATTCCCAGATTACAGTGGGAACTGTATGAAA
+                     <<<<<<<<<<<<<<<<<<<<<<<<
+
+> Contig251_chr13_28498333_28501066 864 T G 0.067573
+
+  1 GCCCTCTGGCTTCTGTTTGGGAGGTAGGGCGGGTGGGCAGGAAGGGAGGACGGTCGGGGT
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 ATTGGTTCnCCTCCTCCtGCTGGGTCCCAGATGGATACAGGCCAGGTCTG
+                               <<<<<<<<<<<<<<<<<<<<
+
+> Contig55_chr13_53467708_53468101 221 T G 5.717222
+ HinfI
+  1 AATACGGTGAAGAGCAAATGAGAAACATTTCTTCAAACATTTGTAAAGTGAAAATATTTA
+       >>>>>>>>>>>>>>>>>>>>>
+
+ 61 AAATGAAATAGATnCCAAATTTTTTCTTCCAAtGGATTATCTACTGGGTTCTGAATATCA
+                                                 <<<<<<<<<<<<<<<
+
+121 CAAAGACAAATG
+    <<<<<<<<<
+
+> Contig48_chr14_11839435_11843272 3014 A G 0.907583
+
+  1 GTGCTTCCAGTCAAAGGGGAAAACTTGATAGACAAAAGTTTGGATTTTTTTTTTTTTCCT
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 TCTCCTTGGGAGTATGTCTGAGTTACCGTTTTTAGTTTTGATCTGTGGAAAAAGTGATTA
+
+
+121 TATAGGTTCCAAATCTTACTTTTCCCTTTTTGTTTTCAATAGACTTTTTGTGATCATTTC
+
+
+181 AnCATAGTTTGTATTATTAAGTAGGGGTTTTtTTTTGTTTTGGTTTTTTTGTGGTTGTGC
+                                                 <<<<<<<<<<<<<<<
+
+241 GTTGTAAG
+    <<<<<
+
+> Contig28_chr14_26905747_26909514 975 G C 0.116622
+ AluI
+  1 CTGGTACGTGCTTCTCCTCCTGCAGCCCACCGTTTACTTGGTAAGTCGCTGCCGATCCGG
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 CGCCCCCGCAATCCCACCCTCGTCGCGAGGACAGACAACCAGGGGCGCGCGGGAGGAGGG
+
+
+121 TGAGACCGCCAGTTCAGCGGAGCAGCGTTCCTAGCGACCGTGTTGGAACAACTTTGGCAA
+
+
+181 nCTGGTCTTTGGATCCCTGCGGGATTTTTCGGGTTTCCCACCCTCATTTCTTGCTT
+                                     <<<<<<<<<<<<<<<<<<<<
+
+> Contig64_chr14_56768376_56768902 473 C T 8.281311
+
+  1 ATAAGAATCTCCTCAGTAGAGAGAAGCCTGATCTACCATGATTTTATTTGAGTAAAACCA
+       >>>>>>>>>>>>>>>>>>>>>>>>>
+
+ 61 TTGAAACAAACAnTTCAAGAAAGATGGTCAGAGAAGCAAAATGTAA
+                        <<<<<<<<<<<<<<<<<<<<<<<
+
+> Contig60_chr15_18493036_18494316 150 G A 0.125024
+
+  1 CGCCTGGAATAGCATGGTGCCTTTAGGAAATTACATCTAACTCTCTAGGGCTGGAAGGAA
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 CACTGAGTnAACGTAAAGAATTGTGGGAGAGAAGCCTTTAGTTAGATCATGCAGGGCtCC
+
+
+121 GTGCTCCAAATGGGCTTTGTGTTTTG
+       <<<<<<<<<<<<<<<<<<<<
+
+> Contig112_chr15_26772864_26773267 374 C T
+
+> Contig119_chr16_6160274_6160477 180 G A
+
+> Contig60_chr16_28079136_28080263 588 T G 5.998983
+ NsiI
+  1 TTAGAGAATTATTCACTCCCCCAAAAGTAATAAAAATATAAGAAACAAAGCATAATCATA
+       >>>>>>>>>>>>>>>>>>>>>>
+
+ 61 ATGCAnTGGTTGAGTTAGTAGTAAATAACATTTTAGGGTCATAAATTAAAAACTGAATTG
+
+
+121 AGATTTAGCTGGAAATTGTGATATAAATGTCAGGATAAGAGAAGCAAGATTGAAAGAAAG
+
+
+181 ATGGATTAAAAATGCTAAATCCTTCTCTACTATTACAGGAAATTGATAAAAGAAGAGAGA
+                                                     <<<<<<<<<<<
+
+241 GGAAACAGCACATAT
+    <<<<<<<<<<<<
+
+> Contig31_chr17_12128267_12129637 205 G A 0.246305
+
+  1 TGGAGGCAATGGAGGTGAATGAGCCCCAGTCCTGGACCTCgAAGCAGACTGGCCAGAGAC
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 ACCAGGATTTAAGGCATGTGATGAAGACACAGTTCAAAGTGACGAGCCCTGCAGACTCTT
+
+
+121 CnGGAGCAGAGgTAGAGTGATGACCCGTACCTGGAAGGTTTTAGGAAGGATAACAATGAA
+                                                               <
+
+181 TTTACCAGAAGGCAGGGGTAGA
+    <<<<<<<<<<<<<<<<<<<
+
+> Contig99_chr17_26021506_26022200 505 C T 0.171977
+ RsaI
+  1 TGTTGCCATGTTGCCAGTATGTTTTTTTAAGTTTTCCTTTTTAATTTCATTTATGATATT
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 TTTTGGAGTAnaGAAGTTATCATTTCACATGATCAACTTTTCAGTCTTTTTCTTTATAAT
+
+
+121 TTTTAAcTTTGTTGTCATGTTTAGAAAGGTTAAATTTATACCTTGTAAAATAcCTTCgCA
+
+
+181 AATTTACATTTGGGAAATTATTAGTAGTATTATTTcAGGAAGTTaTTATTTTTAAGTGTT
+                                                     <<<<<<<<<<<
+
+241 GGGTTCCCGTGA
+    <<<<<<<<<
+
+> Contig27_chr17_61713766_61716585 1056 G C 2.199527
+ Eco47III,HaeII
+  1 AAGAGGCGCAGGAAGGAGAGTCCGCCcGCCGCAGCCCGCCCGCCGGCTCCTCAGACAGCn
+       >>>>>>>>>>>>>>>>>>
+
+ 61 CTCGCaGGTCCTCCAGCCTTCCAGCGAGAAGAAAGAAAGAGCGTCACCGGAAACCACCGA
+                                                          <<<<<<
+
+121 AACTCTGGGGTAGAGCG
+    <<<<<<<<<<<<<<
+
+> Contig229_chr18_3706523_3708577 1076 A G 0.444778
+
+  1 TTTAAACTCCCGTGTCTGTGCTTGATTATGGCACCGTTAcTCTCGGACGTATTTAATTTT
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 CTGATTCTGATTCATTGGTCTATTACATGAGCAATTGGTGGnAAGTGATGTCTGTCTGTG
+
+
+121 GCCCTTACATTATTTATAATAAAACTCTCCTTCAAAGAACCTTTGGACGATGTCTCCACA
+
+
+181 ATTACAGAATGAGTACAAATTAGTTTTCTAAAACAGCAACTGGTGGTTAATTAAGTTTTG
+
+
+241 TCATGTTTTCTGGAGATGAGTGTCTCATGGTTTGGATACTATGAAGGCATTTCTGCAAGG
+                                            <<<<<<<<<<<<<<<<<<<<
+
+301 TT
+
+
+> Contig82_chr18_27305489_27306229 566 C T 0.348750
+
+  1 CCTGAGAACTTCAAGCTCAGCGGAGGGCTGAAAGGGAGGTAACCACTTTTGTACTAAATT
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 GTCACCTCCTTGCTTATTTTCGTGAAGTTCTAAAGAACACAACTATCTCACTAACACAAc
+
+
+121 AGATTTATTATTGAGTTGTCAGAATCAGCAGCTTTTAGTCACngGTCACTTGTGTGCCTC
+                                                        <<<<<<<<
+
+181 CACTCCATCATAACT
+    <<<<<<<<<<<<
+
+> Contig64_chr18_55979770_55980315 49 G A 2.123800
+
+  1 CCCCAAGGAGACAGGAGGGCAGGCTGTGTGGGTTTCCTGGCCCGCAAnCCCTGTGCAGGT
+       >>>>>>>>>>>>>>>>>>>
+
+ 61 GCGgTTCTGCCAGGCCCGCAAATCTCGGTCTCACTTAACTGCGGCATCATTTATGCTAAT
+                                          <<<<<<<<<<<<<<<<<<<<
+
+121 G
+
+
+> Contig146_chr19_5221790_5223013 143 A G 0.869806
+
+  1 TTAGAATGGCTTTTTCACGGAAGGAGATGAGTTATAAAGTACGGgTGACATTTTTTTGTC
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 TTGnGTTTTTTTTTTTTGTCTTGTTTTTAACTGTTGTTTAAGTCAGCCAACAAGTACATA
+
+
+121 ATTTCTCAGCCCACATTTAAAAATTATCAACTCATTTTCACTTGGAGGTGTGGACATAAA
+
+
+181 GCCATAAATATAATTTGCATTCTGCTGACCTGTTTC
+                 <<<<<<<<<<<<<<<<<<<<
+
+> Contig129_chr19_25541958_25542221 202 T C 2.550968
+ HinfI
+  1 AAGAATCAAGCATGCATTCTGCCTTCCCCATGTGAAAAGTACCAGGTGAGGATATGTACC
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 TCTTTATATCCATGTTCCAAGAACAACAACAACAACAAAAGAATGAGAGTnACCACTTTA
+
+
+121 CAACCCCCAAAGAATTAATGGATT
+    <<<<<<<<<<<<<<<<<<<<<
+
+> Contig60_chr19_54013816_54014398 281 A G 1.271267
+
+  1 ATTTCTCTCGCCGATATTGAGGTTAAGTATCCCTCTAGGCTAAAAGACCAGCAGCTTTTC
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 TTAAACCTATTACAGGAATCCCAATAATGGAAAGAAACGAGGGGAGGCAGTGCTCATGTC
+
+
+121 ACATTCTTCCAGAAATCAAATATAnTGGGTTTTTTTGTTGACGTAAATACATAGGTTGGA
+
+
+181 AAAAAGGTAGGGGGAAAGGAAAA
+    <<<<<<<<<<<<<<<<<<<<
+
+> Contig50_chr20_12138509_12141975 3206 C A 0.383804
+
+  1 TCACACCAGGCTCAAGGTTAAGGCAGAACACAAGATAAGAGAGCAAGCTGGCTTCCTGTC
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 CCCCAGCTGGCtTnCCCATGGGAGCAGAAGCTGGATGGGTGCAGCTGCTGGCTAGGGATC
+
+
+121 CTGTAAAAACTGAAGACCTCCaGTCTCCAGGGCTGGAGGaGGGATTCCTGCCCTGGGGGC
+
+
+181 AGGCCaGATGAGAGGGATGCGATAATGGCAGGTGTCTCCACAAGA
+                          <<<<<<<<<<<<<<<<<<<<
+
+> Contig36_chr20_32631363_32632049 176 G A 1.149790
+
+  1 CTGCCCGAAACAAGTTCCTCATTGTTTCCTnCGTTCTGTGCTGTGGCGGTTTCTTCCTGG
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 ACCCAGAGTCCTTTTCcGAACATTAGCAACTCCATTATGCCACACAGAGATGAGATTTGA
+
+
+121 GAAAGGAAAATAAAGTTGTCTCGTGATATGGAGGGCAAAGCTGATAG
+                            <<<<<<<<<<<<<<<<<<<<
+
+> Contig50_chr21_4178523_4178687 121 G A 0.483377
+
+  1 GTAGGAATCTCAAGCCCCAATCTACTTTTCAGGAAGCTGAGGCTCAGAGAAGTAAAGTAA
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 CATGCTCAAATTCACACCAGTAAGTGAGAGAGTTnTAAGTAACTATAGTAAGTGACAGAG
+                                                            <<<<
+
+121 CTGGGATTTGAACCCTCAT
+    <<<<<<<<<<<<<<<<
+
+> Contig129_chr21_31045749_31046924 381 A G 0.028026
+ AcyI,Hsp92I
+  1 CAGCTGAAGCACCCTCTCTGACCAAACCTGATCTTTCTTTTGGGGATCCTTGACnTCTCA
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 TAAGTCTTTATGAACCATTTGTCCTTCCAGCCATCATTTCCTTCAAC
+                            <<<<<<<<<<<<<<<<<<<<
+
+> Contig159_chr22_7896450_7896974 109 G C 0.465232
+
+  1 TAACTGAGTGATAGTGCTTGGcGCAAGACACTAGCAAnCCTGTACTCACCTTCCATTCAT
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 TTATGTCATAATAATAATTCTTTAAATATGGAAAGcGTAgAAACAAAATAGGAACACTGC
+
+
+121 TAAGTATTCATTTAGGTAATAAGTTTAGTGCTAGATGTGTGACAGGAATTATTTTcATTA
+
+
+181 ACCACAAGCAAACATTTATGGAATGTCCATTGCATGCTGAAATGTA
+                           <<<<<<<<<<<<<<<<<<<<
+
+> Contig23_chr22_34612023_34612568 167 C G 0.409430
+
+  1 TATTCTACCACTCAAAGCCAGCCTGAAGGAAnCCTGGGcTCTTTCCATCAGCTATCTGAC
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 AAGTTGATCTAAAcgTGTAGAAAGCATGCCTGGCTCCACACCTGATTTCATGTGGAGCCA
+
+
+121 TCAGCTCTCACACGATCACCTTG
+    <<<<<<<<<<<<<<<<<<<<
+
+> Contig26_chr22_57817664_57819633 1453 A G 0.471213
+ RsaI
+  1 TGCcCACCCACATCAcTGAACAATTCAGAGAAGATTCCTTTAACATATGCATTCAATGTT
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 TAAGCCTCGCTAACATTTTTTAAGCACCGAACCTTTTTAAAAAGGGCTCTAAAAAATAAG
+
+
+121 CATGAAACTAAATCTCTCTAATACgTCACGTGACACACATGTAtATAACCCAGAAGGTnC
+
+
+181 ATCTAGGGAAACGCAAAAGGAATTATG
+        <<<<<<<<<<<<<<<<<<<<
+
+> Contig133_chr23_3525134_3526502 1223 A G 1.358849
+
+  1 TCCTATTTTGTCCCCAAGTCCCAGGTTCAGGAGCTCCATTAAGTCACAGGTAATTCAGCT
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 GAGAGCCTGCAAAATGGCAACCCCACCTGAGGCACTTTCTTTAAATCAACTGTATCAAGG
+
+
+121 TAACATTTACACAGAATAAnAAGCACTCATTTTAAAGAAATAGCTTGATGAGTTTAGTCT
+
+
+181 AATTGTATCTGTGTAGCCACcACACAGTCAAGATa
+               <<<<<<<<<<<<<<<<<<<<<
+
+> Contig35_chr23_28447813_28449115 70 T A 0.163155
+ DdeI
+  1 CCTTTTCTCTCCATTCACACCCCATCCTTCTTnGTCCCTCCAAAACTCCTAGCTGTTTCC
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 CATTTTAGGGTCTCTGCATTTGCTGTTCCAAGCAAGCTCTGCCCCCAAATGATCTGGTGG
+                                                    <<<<<<<<<<<<
+
+121 CTTGTTCCCTC
+    <<<<<<<<
+
+> Contig50_chr24_22515247_22516072 761 C T 0.190253
+
+  1 GGGGAGACCCTGATCCATCCTCATTCTACTGCTTCCCGATGTCCCAGGCCTGCtGTTCTA
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 CACGAAAGCCCATTCngTGCCTCCAAGTAGGGAGCAGAAGGGAAGAACACA
+                                <<<<<<<<<<<<<<<<<<<<
+
+> Contig84_chr24_29196623_29199644 466 C T 0.214603
+
+  1 TAATTGGACaCTTTTGACTTGCGTTTCATGATTTTGCCCCATTTTTCTCTGCnGCAATTT
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 GGCCAGTGATTCCTGTCTTTCCCTCTATTATCCACTCTGATTAACTCAGCTGCACCTGCC
+                                                         <<<<<<<
+
+121 AGCCTTTATTCCTGCA
+    <<<<<<<<<<<<<
+
+> Contig144_chr25_4011170_4013134 541 A G 0.086768
+
+  1 GGTGAGGtGGAGAGTGGCAAGAGCTGTTGGTGGGCGTGTGTGAGCCAGAGGGCAAGCGGG
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 GAGCTCCTAACTGCAAcATCCAGGGGCAGTCGATACTGCCTGGGAAGTAGGAACTGCTCT
+
+
+121 GGAGCATGAGTGGAATTAGCAGATGGATAACAAGGGAGnGCGAcaAGGGCATTTTATGAA
+
+
+181 GATGGAACACCTTGGAAAAGATCAGATTGCTGAAGCATCCGTTTGAGAAAGCACAGATAA
+
+
+241 CTTTTCAAATCTGAAGAGGAGGGACATGACGGGGAGATGAGACTAG
+                           <<<<<<<<<<<<<<<<<<<<
+
+> Contig103_chr25_38891221_38892140 407 G A 0.166581
+ Alw44I,Bsp1286I,CfoI,HhaI
+  1 GTGGGATGCAGGTGCTGTGTCTACCCACTTCTTCCGGGGACCAGCCCCTCTCTGGCCACA
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 CCCACTTCCTCTCATCTTAACTGTCCAAATTTGCTGACTCAAAGGGATGTGTGTGCGTAT
+
+
+121 GTGTGTGTGTGCnCACATGTGCATGCATGTGTTTTGTGTCTTTCACTCTCAAAATTATTT
+
+
+181 AAGTTCCCATGGCCCTGCCCTGATTTATCTCCCAAAT
+                  <<<<<<<<<<<<<<<<<<<<
+
+> Contig204_chr26_4311195_4311778 170 C T 0.085422
+
+  1 AACAGAAGCCTGTCCCAGCTACAGGAGGGAAACGGGCTCGGCAgCgTGGCACTGCCTCAC
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 tGTCACCCCCAGGAGCCCGGGAAGCCGTCCCTTGAnTCCTCAGTGACGGTGACCATGACC
+
+
+121 AAGGGCAGTAACTCTGCCCGCGGGACACAGCgCTCCTGCTCCCgACgGAAGGTGTGCCGG
+
+
+181 CCACAGAGCGCACGTTGgGGCCgAGTTCAGGGGCAGAtAGGAAGACACAGGa
+                                 <<<<<<<<<<<<<<<<<<<<
+
+> Contig146_chr26_26622638_26623906 574 G A 0.318381
+
+  1 TTTCTGAGATCACACAGCCAGGAAATGGGGAGCCAAGATTTGAACCCAAGCCTGTCTGAT
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 TCTGGAACCTGCACCAGAaCCACACCTCAgCCCTGCCTTCCCTTGGAAGGCTtACcnTTG
+
+
+121 TGCCTGGAACATAGTAAGTGCTCAAAAAATGGTcTAAATCATCATCGTGTaTTAGGAAGC
+
+
+181 CTGGGTCCACACCCCTTGGGCTGTGGAGTGTCTTGAG
+                  <<<<<<<<<<<<<<<<<<<<
+
+> Contig135_chr27_6853874_6854079 158 C T 0.060201
+
+  1 AAAGGGTTCCAAGTTACGGGATTcATACGGGAAGGCTCCcGAAATAGAAATGATCGTTGT
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 AACATGGGGAGATTTGTCAGGGACAgACAnGAACTGTCTTATAAAATGCAGCCCAGTTTT
+                                                   <<<<<<<<<<<<<
+
+121 CTTcTTGAGA
+    <<<<<<<
+
+> Contig64_chr27_34654435_34654621 132 C A 0.296658
+ RsaI
+  1 AAATTGGTCAGTGACTGGGAACACGTTCCGAACCAGCTCCGTGGATTTACAAGTTTTCCA
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 GTAGAAACGGTCCTCCAAGCGTnCCTGAAGTGCTcATTCATTACCGCAAGGTG
+                                  <<<<<<<<<<<<<<<<<<<<
+
+> Contig131_chr28_6481806_6483783 138 C T 0.387007
+
+  1 AGaCCCTCGAAATTCTCCAGTTGTCAAATTCTTCCCCAGTnTCTGCTTGAgAGATTTTCT
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 CCTAGCTTCAGAGCCTTAACTACAGAATACTGAGTCTTTGCTCAAGCAGCGGCTCAACAC
+
+
+121 ATAACCCCTAAGCTGCCAAGGCTTTTCTCCCCCAAGACTTTGTTTCCTTCCACGAAACCT
+                                           <<<<<<<<<<<<<<<<<<<<
+
+181 TC
+
+
+> Contig60_chr28_30197166_30197364 92 T C 1.139483
+
+  1 AATCAGAAAGTCCCAGAGGTGGAGACTACAGCAAATTACCTGACATTTGTCTTTGATGCT
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 nTATGTAAAAACTCTGGGTGGCAGGAAAGCACTTAAATTTACCTTGTAGAGCTTTGCTAC
+
+
+121 CCAATAGAACATTCTGTGCTGATGGGAATG
+           <<<<<<<<<<<<<<<<<<<<
+
+> Contig29_chr29_4726399_4727143 559 A T 3.113735
+
+  1 CTTTTTGTGGCCAAAAgTGACAACATAATTTTCAAAATGGGAAACGATGATTACAAATGA
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 GTGGAACACATGTTACAGTGGCAAGATGTGTGAGCAATGCTGATTCAGGGTATAATGGGT
+
+
+121 TGGTTGTAAAACAAATATGAGTTTCTAATATTCGGGCATATTAAACAATCTAAGTTnTAC
+
+
+181 AAAATCTCTCTTGTACTATTTATTGGGTAACTACTAGTAAAGGAAAGGCCTAATAGGCTG
+                                                            <<<<
+
+241 TTCCCATAAAAAGAAGCTAC
+    <<<<<<<<<<<<<<<<<
+
+> Contig1_chr30_5992217_5993068 106 C T 1.078937
+
+  1 CTTGAGACAGCCATGGTGTTTGTTTCTACCTTTCCTCTAAGAAGACACCTGTATACAGAT
+       >>>>>>>>>>>>>>>>>>>>>
+
+ 61 ATTCCnTGTGACTCACACTCATCCTCATAGACATCCCCAGTATCATTTCTGTGAAGCCTT
+                                                           <<<<<
+
+121 CCTTGACATTTTCCAACA
+    <<<<<<<<<<<<<<<
+
+> Contig165_chr30_25804389_25804926 190 T C 0.328844
+
+  1 CCGCTTGTCCCGCTCTGTGATTTAGATGTTTCACGAGCGGGAAGGTGGGGGGATTGATTC
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 TCTnATTCGCGCTTCTCCGCCCAGGCTGCGCATTAGAATCACTTGGGGAGCTTTAAAACA
+                                               <<<<<<<<<<<<<<<<<
+
+121 TGCCAG
+    <<<
+
+> Contig38_chr31_5164423_5166573 2074 C T
+
+> Contig17_chr31_26433828_26434459 498 T C 4.814134
+
+  1 CCATGCAATCTCATGCAATGGTTAAAAGCAATGAATTTGTTGTACTCAAAATACCTTGGT
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 TGGGTATTAAAAAGTTTTnAGTAAACATAATGAAAAAAATAGAAGTAGATAAGATCAACA
+                                                      <<<<<<<<<<
+
+121 CACCTACACGAATTA
+    <<<<<<<<<<<<
+
+> Contig9_chr32_19479532_19479735 12 A G
+
+> Contig30_chr32_25902721_25905783 208 C G 0.322381
+ AluI,HindIII
+  1 TTCACAGTGTTCTCCCAAGGCACAAATAGAATGCTCAGTCATTGGTTATTTTACTTAGAT
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 ATTCCTTTCTAAATATAGACTTACCTGTTATTTTTTCCTATACTAATAACATTCAAATTA
+
+
+121 TCTATGTGTACAATAATAAACACTAGGCATAACTGTATCTCAGTACCAATTTCCTTAGAA
+
+
+181 GGTAAAnCTTATTTCAGTCAAGGTCTAGGCCAAGCATTGA
+                     <<<<<<<<<<<<<<<<<<<<
+
+> Contig18_chr33_22207246_22209159 1363 G T 2.559961
+
+  1 ACGACTGCCCTTTTTCCCTCTGTCTCTATTTCTCCTACACACACACACACACACACACAC
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 ACACAgAGTGAGCTTTTAGCAACCTTGTTTAACATTTGGAAAGGAATAGCTGACACAACA
+
+
+121 GAGGGGGGnATAAGTAAATACATTGCATGGCTGTATATAATTGAACATTCTTCAAATTCT
+
+
+181 TTAAACAGAAATTTCAGTACCATGGAGATCCTTGAAAT
+                 <<<<<<<<<<<<<<<<<<<<<<
+
+> Contig170_chr33_26189421_26189940 292 T C 0.307330
+
+  1 TTCAAGTTCCCCTTTTGTGCCTTCACACACTTGTnTTTATGGTCTCTATTTAAAAAAAAg
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 AGACAAAAACCTCTTAATAAATTTAGGAAGTAGTCCTCACTCTTTAAAGGAATTGTGCTT
+
+
+121 AAAGCAGCAGCTCTTCCTCACTCCTTG
+        <<<<<<<<<<<<<<<<<<<<
+
+> Contig113_chr34_13341080_13341643 236 C T 0.412222
+ Hsp92II,NcoI,StyI
+  1 AAATGCTCATTTCCCAACATTCAGTGAAATGCCTGATGACTAATCCTTGCTCCaTGGATC
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 CTGGGGGTCCCGTGCACAGAAGAGGGTGAGGTCTCTGCCAnGGACTGTGTCCCTGGAAAT
+                                                  <<<<<<<<<<<<<<
+
+121 GACAGGGCA
+    <<<<<<
+
+> Contig152_chr34_31794848_31795540 242 G A 2.779642
+
+  1 ATAGGAAATAAAACCCCAGCTCTCAGAGnAAAGCAAAATACTTTTAAAAAGATGAAAAAG
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 CATCAGAGCTATGAGACACAGAAGATCTAGAGTATAATTGTGTTTTTGTATAGAAGGGAG
+
+
+121 AGAAGGAATGCTGCAGGAGCCACATTTCTCCATCTA
+                 <<<<<<<<<<<<<<<<<<<<
+
+> Contig47_chr35_3666773_3667898 348 G T 0.234571
+
+  1 GCTGCCTCAGCAGTTATCTTGGGTTCTGTTAACTTTGACACACCTTTCACGAAGAAATTC
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 TTCATTGCAGTGCTTGAACAATCTGATTGTTCAATCTGATTnGATTCTATTTCTTGCTGA
+
+
+121 GATAATGTTCTAGCACCTTCTCTGTGGATCCCCTTAT
+                  <<<<<<<<<<<<<<<<<<<<
+
+> Contig74_chr35_25394343_25394813 303 A T 4.297720
+
+  1 AGTTTCCCCAAATGTTCATGATTAACCAGGTAAACTGAAGATTAACCTTAAATATATATT
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 TCTTTGAGTCATTATAATTAAATTAACTAGGTTGTTTTCAAATATACTAATAATAGAAAC
+
+
+121 TGAAAAAATAATCCAAGTAATATaTCTGAATTGAAAAAAAAAGTAAGGCCATTGTATAAA
+
+
+181 ACAACTGAAAGTTTTTGGAnAAGGTACTATTTTTAATTTACAGTGCATTTTTTTAATCGG
+
+
+241 CATTTCAAATAATAACTTCAATCaCACACACAAAAATAAACCAAATCAACTGCATGTAAG
+                                             <<<<<<<<<<<<<<<<<<<
+
+301 GGaAGT
+    <<<
+
+> Contig5_chr36_4562983_4563634 343 C T 1.168507
+
+  1 ATATGAATGGTGGTGATGGATTCAGCATCTTGACTCTTTTTCAACTATGTCAAGATTTGC
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 ACTGGATCTTGTCTAAAGTCACTCTTCTAGGGGAAGTCAAAGAGACTGGGTCaGTCCtCA
+
+
+121 AGATAcGATGTAAGCAGGTAAGATAGCACTATAGTAGGTCTTCTTGTCATGGTGAGTCAA
+
+
+181 TAACCATTCAATATTCTTTCnACCTACTCTTTACCTGCTCAATCAAGGTAGGGGTC
+                                     <<<<<<<<<<<<<<<<<<<<
+
+> Contig133_chr36_32954045_32955409 136 A G 3.772017
+ TaqI
+  1 ATTAAATGAAAACAGTGTCAGGCAATAAGATGTATTAAGTACAGTATGCCTGAGGATATA
+       >>>>>>>>>>>>>>>>>>>>>>
+
+ 61 ATATTAAACACAGATTCTGCTGTTACTATCnAAGTGGATATTAAAATAACAGTGCTACTT
+
+
+121 TGAGGGTAATGCTACTTTGGAGAATATTTTCTAATAAGCTCACCaTAAAATGACggATAA
+                                         <<<<<<<<<<<<<<<<<<<<
+
+> Contig53_chr37_6665763_6665919 116 C T 10.874746
+ BstOI
+  1 AGTCCTCATGTTGTACTTTACCTCACCTGAATTTACTCATCtGATAGTTGGAAATTTGTA
+       >>>>>>>>>>>>>>>>>>>>>>>>
+
+ 61 TCCATTGCCCATCtTCACCACCCCATGTCnCTGGAAACCAACAAtCTGTTCTCTGTATGa
+                                       <<<<<<<<<<<<<<<<<<<<<<<<<
+
+121 CTT
+
+
+> Contig2_chr37_31197993_31198256 182 C T 0.594606
+
+  1 CTCTCACCACATGGAGAATCCTGTATGTTCAGCTGTATGACGTGGGGGGAACGTCAGAGC
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 TCAGTTTCATAGCAGTCAGCTCCATGTTATGGGTTCAAgAnGAAAACAGGTGGCAGGCtT
+
+
+121 GCCACAGCCTCCCTCAGGGGTGgCCTTGACAGATAAAcGT
+                     <<<<<<<<<<<<<<<<<<<<
+
+> Contig7_chr38_12217200_12218387 1163 A T
+
+> Contig265_chrX_2689247_2689484 114 C G 9.232233
+
+  1 CTTAGAGAATTCCCTGATTCACTGAGTTAAATTATTACCAAATCTGATAATAATAAAAGA
+       >>>>>>>>>>>>>>>>>>>>>>>
+
+ 61 AGTAATTACAGATCAATAATTAATCTATATGTCTGAATACATTTTAATAAGTCCnAcTCA
+
+
+121 ACAATATGCTGACAAAACAATACATCTTGTCT
+          <<<<<<<<<<<<<<<<<<<<<<<
+
+> Contig113_chrX_26287829_26288398 385 C T 0.077485
+
+  1 AAAGCCGTAACAGTCGCTAGGAGAATCATAATTTTAAGCTTTGTGTGTCCCGGGcTTGAG
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 TCCCTCAGGAGTAGTTAGATGCGGCCTTAAATTCTCcCAGTAAATTCACnTTGACGGCCT
+
+
+121 ATTTTTGACCTGGGGGCACACGCTGCTATACACTCTAGCCACCTCTGATCCTCTGGCCTC
+
+
+181 CTCTGTTACAATGACAGAAACGACAGAAGCATTTCTTTAAAATAAGTCCCAGTACGTGCA
+
+
+241 CACAAACGTTCAGGGCAGCCTTCTCCATAAACGGCACGAAATGGC
+                          <<<<<<<<<<<<<<<<<<<<
+
+> Contig90_chrX_57430715_57431566 548 C T 0.153995
+ EcoRV
+  1 CTCATTCCCAGCTACCTCCACCTCTATACCAACCCCTAGTTCCTGTACATCCCTGCTTCT
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 ATAGGAAATCTTCCTGGTGTTGATATnATTCCCAAGGTCAGGCTGTCCTCCTAGCTCCCT
+
+
+121 CTCCTCATCTGCATCAAGTCCTCCAAACTGGGCAGTAGAC
+                     <<<<<<<<<<<<<<<<<<<<
+
+> Contig133_chrX_84833782_84834125 182 G A 0.277794
+
+  1 CACCAGAGTGCAATCGAGAACCATCTGATCACAGAACCATAGAAAAGATTGCTGTACAAG
+       >>>>>>>>>>>>>>>>>>>>
+
+ 61 ACTTAGGAACTCATTCTGTTCAGGATGGAGAAGCTGATGCCCAAAAAGGGAAAGGAACTT
+
+
+121 AACCAAAGTCCATACAnTATCAACTCTACACATAAAGGAAGGGAGTGGAGGGAGCAGTAA
+
+
+181 GACCAGAGATATAGACCCCAGTGAGGAGGCTGTGAGCTCCTG
+                       <<<<<<<<<<<<<<<<<<<<
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/test-data/test_out/find_intervals/find_intervals.interval	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,1 @@
+chr2	9817960	67331624	1272.2000
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/test-data/test_out/map_ensembl_transcripts/map_ensembl_transcripts.tabular	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,150 @@
+ENSCAFT00000000001	476153	cfa00230=Purine metabolism.cfa00500=Starch and sucrose metabolism.cfa00740=Riboflavin metabolism.cfa00760=Nicotinate and nicotinamide metabolism.cfa00770=Pantothenate and CoA biosynthesis.cfa01100=Metabolic pathways
+ENSCAFT00000000144	483960	N
+ENSCAFT00000000160	610160	N
+ENSCAFT00000000215	U	N
+ENSCAFT00000000233	483973	N
+ENSCAFT00000000365	474414	cfa00450=Selenocompound metabolism.cfa00970=Aminoacyl-tRNA biosynthesis
+ENSCAFT00000000507	484023	N
+ENSCAFT00000000517	476233	N
+ENSCAFT00000000674	611986	N
+ENSCAFT00000000724	609478	N
+ENSCAFT00000000760	U	N
+ENSCAFT00000000762	U	N
+ENSCAFT00000001047	475067	cfa00240=Pyrimidine metabolism.cfa00410=beta-Alanine metabolism.cfa00770=Pantothenate and CoA biosynthesis.cfa00983=Drug metabolism - other enzymes.cfa01100=Metabolic pathways
+ENSCAFT00000001052	U	N
+ENSCAFT00000001063	481999	N
+ENSCAFT00000001076	U	N
+ENSCAFT00000001104	607591	N
+ENSCAFT00000001141	484064	N
+ENSCAFT00000001146	475076	N
+ENSCAFT00000001204	481203	N
+ENSCAFT00000001219	474465	N
+ENSCAFT00000001250	481729.481731	cfa04145=Phagosome.cfa04514=Cell adhesion molecules (CAMs).cfa04612=Antigen processing and presentation.cfa04672=Intestinal immune network for IgA production.cfa04940=Type I diabetes mellitus.cfa05140=Leishmaniasis.cfa05145=Toxoplasmosis.cfa05150=Staphylococcus aureus infection.cfa05152=Tuberculosis.cfa05164=Influenza A.cfa05166=HTLV-I infection.cfa05168=Herpes simplex infection.cfa05310=Asthma.cfa05320=Autoimmune thyroid disease.cfa05322=Systemic lupus erythematosus.cfa05323=Rheumatoid arthritis.cfa05330=Allograft rejection.cfa05332=Graft-versus-host disease.cfa05416=Viral myocarditis
+ENSCAFT00000001352	482026	cfa00565=Ether lipid metabolism
+ENSCAFT00000001363	475084	cfa03022=Basal transcription factors
+ENSCAFT00000001421	484096	N
+ENSCAFT00000001523	475088	N
+ENSCAFT00000001575	481744	cfa04141=Protein processing in endoplasmic reticulum
+ENSCAFT00000001587	482035	N
+ENSCAFT00000001597	609411	N
+ENSCAFT00000002056	610014	N
+ENSCAFT00000002100	U	N
+ENSCAFT00000002110	481249	N
+ENSCAFT00000002175	476310	N
+ENSCAFT00000002259	484151	N
+ENSCAFT00000002460	481785	N
+ENSCAFT00000002537	U	N
+ENSCAFT00000002577	484157	N
+ENSCAFT00000002578	608906	N
+ENSCAFT00000002660	U	N
+ENSCAFT00000002792	474523	N
+ENSCAFT00000002849	475216	N
+ENSCAFT00000002999	U	N
+ENSCAFT00000003163	474921	cfa03040=Spliceosome
+ENSCAFT00000003223	474925	N
+ENSCAFT00000003307	609995	N
+ENSCAFT00000003515	482316	N
+ENSCAFT00000003560	U	N
+ENSCAFT00000003644	484216	cfa00970=Aminoacyl-tRNA biosynthesis
+ENSCAFT00000003824	475249	N
+ENSCAFT00000003840	482333	N
+ENSCAFT00000004092	474960	N
+ENSCAFT00000004103	484298	N
+ENSCAFT00000004208	481637	N
+ENSCAFT00000004253	100534006.100534007.474588	N
+ENSCAFT00000004311	482346	N
+ENSCAFT00000004464	481892	N
+ENSCAFT00000004511	481893	N
+ENSCAFT00000004609	611755	N
+ENSCAFT00000004673	611817	N
+ENSCAFT00000004726	610047	cfa00230=Purine metabolism.cfa00240=Pyrimidine metabolism.cfa01100=Metabolic pathways.cfa03030=DNA replication.cfa03410=Base excision repair.cfa03420=Nucleotide excision repair.cfa03430=Mismatch repair.cfa03440=Homologous recombination.cfa05166=HTLV-I infection
+ENSCAFT00000004799	U	N
+ENSCAFT00000004933	482382	cfa04621=NOD-like receptor signaling pathway.cfa05133=Pertussis
+ENSCAFT00000004993	474995	cfa03008=Ribosome biogenesis in eukaryotes
+ENSCAFT00000005126	U	N
+ENSCAFT00000005142	606804	N
+ENSCAFT00000005225	475647	N
+ENSCAFT00000005323	U	N
+ENSCAFT00000005467	U	N
+ENSCAFT00000005496	481925	N
+ENSCAFT00000005518	492302	cfa02010=ABC transporters.cfa04971=Gastric acid secretion.cfa04972=Pancreatic secretion.cfa04976=Bile secretion
+ENSCAFT00000005653	403417	cfa04145=Phagosome.cfa04620=Toll-like receptor signaling pathway.cfa05132=Salmonella infection.cfa05133=Pertussis.cfa05134=Legionellosis.cfa05140=Leishmaniasis.cfa05142=Chagas disease (American trypanosomiasis).cfa05144=Malaria.cfa05145=Toxoplasmosis.cfa05146=Amoebiasis.cfa05152=Tuberculosis.cfa05162=Measles.cfa05164=Influenza A.cfa05323=Rheumatoid arthritis
+ENSCAFT00000005746	476410	cfa00071=Fatty acid metabolism.cfa03320=PPAR signaling pathway.cfa04920=Adipocytokine signaling pathway
+ENSCAFT00000005749	610007	N
+ENSCAFT00000005832	403584	cfa04060=Cytokine-cytokine receptor interaction.cfa04630=Jak-STAT signaling pathway.cfa04672=Intestinal immune network for IgA production.cfa05166=HTLV-I infection.cfa05168=Herpes simplex infection.cfa05323=Rheumatoid arthritis
+ENSCAFT00000005972	475012	N
+ENSCAFT00000006025	482980	N
+ENSCAFT00000006114	483829	N
+ENSCAFT00000006157	475021	N
+ENSCAFT00000006219	483261	cfa04972=Pancreatic secretion.cfa04978=Mineral absorption
+ENSCAFT00000006272	484394	cfa00280=Valine, leucine and isoleucine degradation.cfa00290=Valine, leucine and isoleucine biosynthesis.cfa00770=Pantothenate and CoA biosynthesis.cfa01100=Metabolic pathways
+ENSCAFT00000006453	475893	N
+ENSCAFT00000006479	U	N
+ENSCAFT00000006507	484622	cfa03030=DNA replication.cfa04110=Cell cycle
+ENSCAFT00000006669	476094	N
+ENSCAFT00000006689	475897	N
+ENSCAFT00000006827	U	N
+ENSCAFT00000006891	610021	N
+ENSCAFT00000007130	485445	cfa04020=Calcium signaling pathway.cfa04080=Neuroactive ligand-receptor interaction
+ENSCAFT00000007145	607961	N
+ENSCAFT00000007244	476781	N
+ENSCAFT00000007375	403767	cfa04977=Vitamin digestion and absorption
+ENSCAFT00000007440	482516	N
+ENSCAFT00000007467	485576	N
+ENSCAFT00000007484	609336	N
+ENSCAFT00000007527	607108	N
+ENSCAFT00000007553	487123	cfa03450=Non-homologous end-joining.cfa05340=Primary immunodeficiency
+ENSCAFT00000007697	475382	N
+ENSCAFT00000007703	477019	cfa03430=Mismatch repair.cfa03460=Fanconi anemia pathway.cfa05200=Pathways in cancer.cfa05210=Colorectal cancer.cfa05213=Endometrial cancer
+ENSCAFT00000007747	U	N
+ENSCAFT00000007774	477021	cfa04510=Focal adhesion.cfa04512=ECM-receptor interaction.cfa04514=Cell adhesion molecules (CAMs).cfa04810=Regulation of actin cytoskeleton.cfa05410=Hypertrophic cardiomyopathy (HCM).cfa05412=Arrhythmogenic right ventricular cardiomyopathy (ARVC).cfa05414=Dilated cardiomyopathy
+ENSCAFT00000007776	U	N
+ENSCAFT00000007779	478007.478008	cfa03060=Protein export.cfa04141=Protein processing in endoplasmic reticulum.cfa04145=Phagosome
+ENSCAFT00000007859	483010	N
+ENSCAFT00000007951	U	N
+ENSCAFT00000007959	482810.611087	N
+ENSCAFT00000008012	485173	N
+ENSCAFT00000008063	484489	N
+ENSCAFT00000008142	476128	N
+ENSCAFT00000008198	612489	N
+ENSCAFT00000008413	U	N
+ENSCAFT00000008540	483021	N
+ENSCAFT00000008586	484499	N
+ENSCAFT00000008588	U	N
+ENSCAFT00000008673	478018	N
+ENSCAFT00000008678	485188	N
+ENSCAFT00000008728	U	N
+ENSCAFT00000008769	485523	cfa02010=ABC transporters.cfa04976=Bile secretion
+ENSCAFT00000008831	475398	N
+ENSCAFT00000009074	485769	cfa04330=Notch signaling pathway
+ENSCAFT00000009114	483354	N
+ENSCAFT00000009614	475416	N
+ENSCAFT00000009698	486001	N
+ENSCAFT00000009710	486002	N
+ENSCAFT00000010094	486223	cfa00230=Purine metabolism.cfa00240=Pyrimidine metabolism.cfa01100=Metabolic pathways.cfa03030=DNA replication.cfa03410=Base excision repair.cfa03420=Nucleotide excision repair.cfa05166=HTLV-I infection
+ENSCAFT00000010141	482857	cfa04360=Axon guidance
+ENSCAFT00000010439	610992	N
+ENSCAFT00000010496	415126	cfa04380=Osteoclast differentiation.cfa04916=Melanogenesis.cfa05200=Pathways in cancer.cfa05218=Melanoma
+ENSCAFT00000010516	U	N
+ENSCAFT00000010531	484693	N
+ENSCAFT00000010559	483405	N
+ENSCAFT00000010593	U	N
+ENSCAFT00000010616	474176	cfa03450=Non-homologous end-joining.cfa04110=Cell cycle
+ENSCAFT00000010630	486770	N
+ENSCAFT00000010829	486944	N
+ENSCAFT00000010865	U	N
+ENSCAFT00000010931	485368	N
+ENSCAFT00000010977	U	N
+ENSCAFT00000010988	482891	cfa04145=Phagosome
+ENSCAFT00000011187	475441	N
+ENSCAFT00000011380	U	N
+ENSCAFT00000011397	475750	cfa04110=Cell cycle.cfa04114=Oocyte meiosis.cfa04120=Ubiquitin mediated proteolysis.cfa04914=Progesterone-mediated oocyte maturation.cfa05166=HTLV-I infection
+ENSCAFT00000011721	475621	N
+ENSCAFT00000011730	486534	N
+ENSCAFT00000011771	477193	N
+ENSCAFT00000011789	609978	N
+ENSCAFT00000011968	488881	cfa00760=Nicotinate and nicotinamide metabolism.cfa04146=Peroxisome
+ENSCAFT00000012081	478082	cfa04621=NOD-like receptor signaling pathway
+ENSCAFT00000012133	611998	N
+ENSCAFT00000012159	484609	N
+ENSCAFT00000012254	U	N
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/test-data/test_out/modify_snp_table/modify.gd_snp	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,338 @@
+Contig161_chr1_4641264_4641879	115	C	T	73.5	chr1	4641382	C	6	0	2	45	8	0	2	51	15	0	2	72	5	0	2	42	6	0	2	45	10	0	2	57	Y	54	0.323	0
+Contig20_chr1_21313469_21313570	66	C	T	54.0	chr1	21313534	C	4	0	2	39	4	0	2	39	5	0	2	42	4	0	2	39	4	0	2	39	5	0	2	42	N	1	+99.	0
+Contig86_chr1_30984450_30985684	670	C	T	365.0	chr1	30985133	C	9	0	2	54	10	0	2	57	13	0	2	66	3	0	2	36	9	0	2	54	7	0	2	48	Y	145	0.031	0
+Contig5_chr1_32562160_32563940	1215	G	T	163.0	chr1	32563356	G	17	0	2	78	19	0	2	84	20	0	2	87	14	0	2	69	12	0	2	63	10	0	2	57	Y	17	0.251	0
+Contig110_chr1_33385093_33386888	510	C	T	270.0	chr1	33385587	A	14	0	2	69	11	0	2	60	19	0	2	84	11	0	2	60	10	0	2	57	13	0	2	66	Y	13	0.126	0
+Contig100_chr1_33562920_33564288	743	C	T	178.0	chr1	33563655	C	6	0	2	45	10	0	2	57	8	0	2	51	5	0	2	42	13	0	2	66	7	0	2	48	Y	13	0.090	3
+Contig7_chr1_37302355_37302489	97	A	G	59.2	chr1	37302452	G	3	0	2	36	8	0	2	51	5	0	2	42	8	0	2	51	7	0	2	48	6	0	2	45	N	56	2.812	0
+Contig62_chr1_41880715_41882180	1078	T	G	57.6	chr1	41881785	T	14	0	2	69	15	0	2	72	16	0	2	75	13	0	2	66	8	0	2	51	10	0	2	57	Y	21	0.477	0
+Contig47_chr1_48409178_48409384	37	C	T	134.0	chr1	48409215	T	5	0	2	42	6	0	2	45	8	0	2	51	9	0	2	54	4	0	2	39	6	0	2	45	N	66	+99.	0
+Contig119_chr1_49647683_49650077	1618	C	A	99.7	chr1	49649276	A	8	0	2	51	11	0	2	60	10	0	2	57	9	0	2	54	10	0	2	57	14	0	2	69	Y	16	0.166	0
+Contig21_chr1_60697952_60699446	307	G	A	51.9	chr1	60698265	G	12	0	2	63	9	0	2	54	4	0	2	39	6	0	2	45	9	0	2	54	4	0	2	39	Y	98	0.507	0
+Contig131_chr1_62319542_62320564	169	C	G	103.0	chr1	62319709	C	12	0	2	63	12	0	2	66	14	0	2	69	12	0	2	63	9	0	2	54	9	0	2	54	Y	73	0.307	1
+Contig14_chr1_63450425_63450680	101	T	A	102.0	chr1	63450530	T	8	0	2	51	10	0	2	57	18	0	2	81	8	0	2	51	8	0	2	34	8	0	2	51	N	99	1.085	0
+Contig83_chr1_63869778_63869942	40	T	C	23.7	chr1	63869819	C	5	0	2	42	7	0	2	48	2	0	2	33	4	0	2	39	6	0	2	48	4	0	2	39	N	654	1.364	0
+Contig30_chr1_64702572_64703138	178	A	T	117.0	chr1	64702750	T	10	0	2	57	10	0	2	57	20	0	2	87	21	0	2	90	6	0	2	45	12	0	2	63	Y	50	3.872	0
+Contig101_chr1_69868406_69868872	287	G	A	14.6	chr1	69868689	G	13	0	2	66	17	0	2	78	10	0	2	57	8	0	2	51	7	0	2	48	8	0	2	51	N	137	0.305	0
+Contig35_chr1_74482577_74482791	170	G	A	45.4	chr1	74482751	A	3	0	2	36	4	0	2	39	13	0	2	66	2	0	2	33	5	0	2	42	2	0	2	33	N	20	+99.	3
+Contig49_chr1_83865731_83865944	85	G	A	34.1	chr1	-1	N	4	0	2	39	4	0	2	39	8	0	2	51	2	0	2	33	5	0	2	42	4	0	2	39	N	-1	1.485	0
+Contig129_chr1_117547123_117548666	926	G	A	126.0	chr1	117548059	G	19	0	2	84	9	0	2	54	11	0	2	60	10	0	2	57	12	0	2	63	11	0	2	60	Y	64	0.049	0
+Contig7_chr1_125154638_125154844	190	G	T	130.0	chr1	125154818	A	5	0	2	42	4	0	2	39	7	0	2	48	2	0	2	33	7	0	2	48	4	0	2	39	N	33	+99.	0
+Contig222_chr2_9817738_9818143	220	C	T	888.0	chr2	9817960	C	17	0	2	78	12	0	2	63	20	0	2	87	8	0	2	51	11	0	2	60	12	0	2	63	Y	76	0.093	1
+Contig47_chr2_25470778_25471576	126	G	A	888.0	chr2	25470896	G	12	0	2	63	14	0	2	69	14	0	2	69	10	0	2	57	18	0	2	81	13	0	2	66	N	11	0.289	1
+Contig10_chr2_40859744_40860534	637	G	A	888.0	chr2	40860397	A	3	0	2	36	3	0	2	36	2	0	2	33	7	0	2	48	6	0	2	45	8	0	2	51	Y	42	1.435	0
+Contig52_chr2_41421981_41422725	604	C	A	888.0	chr2	41422583	A	17	0	2	78	18	0	2	81	14	0	2	69	17	0	2	78	12	0	2	63	14	0	2	69	Y	44	0.882	0
+Contig94_chr2_43869105_43870358	220	G	A	888.0	chr2	43869333	G	12	0	2	63	18	0	2	81	11	0	2	60	15	0	2	72	12	0	2	63	13	0	2	66	Y	1	0.156	0
+Contig34_chr2_48444129_48444939	695	C	T	134.0	chr2	48444828	C	14	0	2	69	8	0	2	51	16	0	2	75	17	0	2	78	9	0	2	54	15	0	2	72	Y	161	0.375	0
+Contig6_chr2_56859179_56859956	671	T	C	999.9	chr2	56859851	T	15	0	2	72	18	0	2	81	20	0	2	90	19	0	2	84	19	0	2	84	24	0	2	99	N	28	5.308	1
+Contig115_chr2_61631913_61632510	310	G	T	999.3	chr2	61632216	G	7	0	2	48	9	0	2	54	7	0	2	48	11	0	2	60	10	0	2	57	10	0	2	57	N	13	0.184	0
+Contig31_chr2_67331584_67331785	39	C	T	999.0	chr2	67331623	C	11	0	2	60	10	0	2	57	7	0	2	48	9	0	2	54	2	0	2	33	4	0	2	39	N	110	0.647	1
+Contig92_chr2_75906683_75907774	773	T	C	85.4	chr2	75907438	C	12	0	2	63	12	0	2	63	17	0	2	78	8	0	2	51	8	0	2	51	13	0	2	66	Y	93	0.166	0
+Contig163_chr2_76402959_76404830	221	C	T	127.0	chr2	76403181	C	4	0	2	42	10	0	2	57	9	0	2	54	11	0	2	60	7	0	2	48	9	0	2	54	Y	54	0.178	1
+Contig59_chr2_85243022_85243758	506	G	A	96.3	chr2	85243509	T	9	0	2	54	11	0	2	60	12	0	2	63	14	0	2	69	10	0	2	57	7	0	2	48	Y	6	0.459	0
+Contig56_chr3_17326225_17327548	387	G	C	91.2	chr3	17326591	G	14	0	2	69	13	0	2	66	15	0	2	72	15	0	2	72	13	0	2	66	12	0	2	63	Y	20	0.225	3
+Contig108_chr3_46210055_46210874	367	A	G	21.0	chr3	46210423	A	19	0	2	84	10	0	2	57	16	0	2	75	14	0	2	69	20	0	2	87	11	0	2	60	N	236	0.028	1
+Contig16_chr3_47113407_47114449	322	G	A	105.0	chr3	47113713	G	13	0	2	66	17	0	2	78	15	0	2	72	6	0	2	45	11	0	2	60	11	0	2	60	Y	114	0.132	5
+Contig3_chr3_47564810_47565251	262	T	G	112.0	chr3	47565104	T	14	0	2	69	16	0	2	75	20	0	2	87	10	0	2	57	9	0	2	54	8	0	2	51	Y	24	0.073	1
+Contig35_chr3_49662401_49662929	270	A	T	96.1	chr3	49662652	A	14	0	2	69	11	0	2	60	23	0	2	96	13	0	2	66	12	0	2	63	11	0	2	60	Y	36	3.583	2
+Contig97_chr3_49820354_49821631	1069	G	A	44.1	chr3	49821402	G	9	0	2	54	9	0	2	54	6	0	2	45	10	0	2	57	5	0	2	42	8	0	2	51	N	6	0.201	2
+Contig25_chr3_53260697_53262560	402	G	A	211.0	chr3	53261095	G	17	0	2	78	14	0	2	69	15	0	2	75	12	0	2	63	14	0	2	69	12	0	2	63	Y	116	1.033	0
+Contig11_chr3_53992739_53995954	2392	G	A	82.4	chr3	53995143	A	12	0	2	66	11	0	2	60	14	0	2	69	6	0	2	45	11	0	2	60	17	0	2	78	Y	358	0.321	1
+Contig236_chr3_72676275_72676473	128	G	A	278.0	chr3	72676410	G	12	0	2	63	11	0	2	60	13	0	2	66	10	0	2	57	11	0	2	60	8	0	2	51	N	36	0.496	1
+Contig48_chr3_74792236_74792388	63	T	C	111.0	chr3	74792289	-	17	0	2	78	9	0	2	54	9	0	2	54	5	0	2	42	11	0	2	60	9	0	2	54	N	-1	3.528	0
+Contig65_chr3_80727952_80728283	39	T	C	71.2	chr3	80727990	T	7	0	2	48	3	0	2	36	8	0	2	51	6	0	2	45	8	0	2	51	11	0	2	60	N	22	7.078	0
+Contig53_chr3_86407941_86409349	1406	G	A	86.9	chr3	86409317	A	5	0	2	42	5	0	2	42	4	0	2	39	10	0	2	57	8	0	2	51	12	0	2	63	N	14	3.285	1
+Contig13_chr3_92409738_92412300	718	A	G	23.3	chr3	92410450	A	12	0	2	63	16	0	2	75	18	0	2	81	13	0	2	66	22	0	2	93	7	0	2	48	Y	23	0.224	2
+Contig134_chr4_12145648_12148225	1326	C	T	164.0	chr4	12146961	C	9	0	2	54	8	0	2	51	7	0	2	48	3	0	2	36	5	0	2	42	5	0	2	42	Y	4	0.080	1
+Contig88_chr4_15557471_15557833	268	A	G	145.0	chr4	15557737	A	6	0	2	45	6	0	2	45	11	0	2	60	9	0	2	54	5	0	2	42	6	0	2	45	Y	46	4.138	0
+Contig53_chr4_18823968_18824478	149	A	G	91.3	chr4	18824115	A	18	0	2	81	15	0	2	72	21	0	2	90	13	0	2	66	9	0	2	54	12	0	2	63	N	51	0.251	0
+Contig19_chr4_26233601_26233991	146	G	C	51.6	chr4	26233744	G	10	0	2	57	8	0	2	51	9	0	2	54	5	0	2	42	9	0	2	54	4	0	2	39	N	41	0.163	3
+Contig78_chr4_28579975_28580134	30	T	G	19.6	chr4	28579994	-	4	0	2	39	3	0	2	36	5	0	2	42	4	0	2	39	2	0	2	33	2	0	2	33	N	33	0.499	0
+Contig16_chr4_30177226_30179725	621	C	T	88.4	chr4	30177859	C	20	0	2	87	13	0	2	66	13	0	2	66	11	0	2	60	8	0	2	51	8	0	2	51	Y	45	0.797	1
+Contig30_chr4_46196500_46197672	1045	A	C	33.4	chr4	46197522	C	16	0	2	75	9	0	2	54	4	0	2	39	7	0	2	48	14	0	2	69	6	0	2	45	Y	43	0.306	0
+Contig2_chr4_47039007_47039323	158	G	C	35.1	chr4	47039160	-	8	0	2	51	9	0	2	54	13	0	2	66	8	0	2	51	10	0	2	60	9	0	2	54	N	0	0.131	0
+Contig17_chr4_61310346_61311158	267	C	T	49.9	chr4	61310604	T	10	0	2	57	7	0	2	48	9	0	2	54	10	0	2	57	14	0	2	69	7	0	2	48	Y	219	0.098	0
+Contig26_chr4_64190783_64191295	64	A	G	162.0	chr4	64190843	A	10	0	2	57	6	0	2	45	20	0	2	87	12	0	2	63	17	0	2	78	7	0	2	48	Y	306	7.428	0
+Contig11_chr4_65500960_65501654	634	T	C	107.0	chr4	65501585	T	13	0	2	66	14	0	2	69	13	0	2	66	13	0	2	66	6	0	2	45	18	0	2	81	Y	10	6.849	0
+Contig38_chr4_67768488_67768982	113	A	G	102.0	chr4	67768598	A	9	0	2	54	8	0	2	51	9	0	2	54	11	0	2	60	10	0	2	57	7	0	2	48	Y	188	3.175	0
+Contig30_chr4_70978564_70979580	596	A	G	164.0	chr4	70979151	A	15	0	2	72	12	0	2	63	20	0	2	87	14	0	2	69	15	0	2	72	15	0	2	72	Y	111	2.458	2
+Contig72_chr4_74225793_74226492	674	A	G	110.0	chr4	74226472	A	5	0	2	42	3	0	2	36	2	0	2	33	3	0	2	36	7	0	2	48	4	0	2	39	Y	115	+99.	1
+Contig32_chr4_75618955_75620254	301	T	C	333.0	chr4	75619257	C	10	0	2	57	8	0	2	51	12	0	2	63	20	0	2	87	12	0	2	63	14	0	2	69	Y	34	0.163	2
+Contig31_chr5_4734956_4736547	1166	C	T	133.0	chr5	4736132	C	14	0	2	69	8	0	2	51	17	0	2	78	4	0	2	39	9	0	2	54	12	0	2	63	Y	1	0.021	0
+Contig30_chr5_15698241_15699076	396	G	T	76.6	chr5	15698633	T	8	0	2	51	9	0	2	54	10	0	2	57	7	0	2	48	11	0	2	60	8	0	2	54	Y	65	0.009	0
+Contig36_chr5_17709244_17710004	373	T	C	281.0	chr5	17709624	T	6	0	2	45	9	0	2	54	7	0	2	48	4	0	2	39	10	0	2	57	4	0	2	39	Y	16	0.131	0
+Contig13_chr5_21881138_21881562	227	A	G	251.0	chr5	21881356	A	11	0	2	60	20	0	2	87	22	0	2	93	10	0	2	57	10	0	2	57	21	0	2	90	Y	182	2.013	0
+Contig5_chr5_23188121_23190168	1841	C	T	141.0	chr5	23189975	C	20	0	2	87	19	0	2	84	22	0	2	93	16	0	2	75	18	0	2	81	14	0	2	69	N	45	0.355	0
+Contig6_chr5_26899813_26900498	97	A	C	88.6	chr5	26899910	A	15	0	2	72	14	0	2	69	27	0	2	108	15	0	2	72	13	0	2	69	12	0	2	63	Y	92	7.370	3
+Contig314_chr5_34019166_34019319	72	C	A	20.1	chr5	-1	N	6	0	2	45	9	0	2	54	4	0	2	39	4	0	2	39	9	0	2	54	5	0	2	42	N	-1	+99.	4
+Contig147_chr5_38980258_38980559	221	C	T	40.8	chr5	38980477	C	15	0	2	72	15	0	2	72	19	0	2	84	10	0	2	57	12	0	2	63	20	0	2	87	Y	11	4.576	0
+Contig115_chr5_48119079_48120169	151	C	T	78.3	chr5	48119234	C	17	0	2	78	10	0	2	57	14	0	2	69	16	0	2	75	8	0	2	51	12	0	2	63	Y	205	0.320	0
+Contig45_chr5_50892738_50892968	169	C	A	25.8	chr5	50892911	C	10	0	2	57	7	0	2	48	10	0	2	60	6	0	2	45	6	0	2	45	13	0	2	66	N	244	0.497	1
+Contig40_chr5_51484164_51484696	14	A	G	53.3	chr5	51484180	A	6	0	2	45	4	0	2	39	4	0	2	39	3	0	2	36	0	0	2	13	3	0	2	36	N	63	+99.	1
+Contig40_chr5_51664286_51667573	861	C	T	148.0	chr5	51665149	C	20	0	2	87	21	0	2	90	20	0	2	87	11	0	2	60	16	0	2	75	15	0	2	72	Y	207	0.080	1
+Contig15_chr5_51889708_51891244	882	A	G	149.0	chr5	51890581	G	13	0	2	66	18	0	2	81	17	0	2	78	22	0	2	93	15	0	2	72	22	0	2	93	Y	7	0.025	1
+Contig143_chr5_57231364_57232010	294	T	C	78.5	chr5	57231644	T	3	0	2	36	5	0	2	42	4	0	2	39	2	0	2	33	10	0	2	57	6	0	2	45	Y	73	0.337	2
+Contig13_chr5_57609985_57610584	496	C	T	50.5	chr5	57610476	C	17	0	2	78	9	0	2	54	6	0	2	45	8	0	2	51	10	0	2	57	12	0	2	63	N	77	2.022	1
+Contig230_chr5_58486998_58487280	227	T	C	192.0	chr5	58487232	T	3	0	2	36	4	0	2	39	9	0	2	54	6	0	2	45	4	0	2	39	7	0	2	48	N	24	0.100	2
+Contig32_chr5_70852360_70853289	282	G	A	114.0	chr5	70852623	G	16	0	2	75	11	0	2	60	13	0	2	66	12	0	2	63	13	0	2	66	7	0	2	48	Y	33	0.276	0
+Contig100_chr5_71189678_71190590	813	C	T	30.8	chr5	71190523	C	11	0	2	60	11	0	2	60	9	0	2	54	10	0	2	57	6	0	2	45	13	0	2	66	Y	8	0.362	1
+Contig45_chr5_76133561_76134403	388	A	G	103.0	chr5	76133941	G	3	0	2	36	8	0	2	51	8	0	2	51	5	0	2	42	6	0	2	45	7	0	2	48	Y	57	0.038	0
+Contig61_chr5_90202541_90204393	909	C	T	101.0	chr5	90203461	T	7	0	2	48	5	0	2	42	14	0	2	69	3	0	2	36	5	0	2	42	8	0	2	51	Y	64	1.448	0
+Contig111_chr6_5821219_5822519	1060	A	G	68.1	chr6	5822321	T	7	0	2	48	6	0	2	45	11	0	2	60	9	0	2	54	3	0	2	36	12	0	2	63	Y	7	0.231	1
+Contig220_chr6_10671338_10672441	999	T	C	36.3	chr6	10672322	T	5	0	2	42	11	0	2	60	11	0	2	60	8	0	2	51	5	0	2	42	9	0	2	54	Y	1	1.667	0
+Contig226_chr6_17361986_17362884	418	G	C	251.0	chr6	17362406	G	6	0	2	45	8	0	2	51	7	0	2	48	9	0	2	54	7	0	2	48	7	0	2	48	Y	7	0.147	0
+Contig51_chr6_20231207_20231785	161	A	G	70.5	chr6	20231375	G	13	0	2	66	5	0	2	42	8	0	2	51	2	0	2	36	5	0	2	42	5	0	2	42	Y	153	1.754	0
+Contig102_chr6_30271329_30271577	39	T	G	139.0	chr6	30271371	G	3	0	2	36	4	0	2	39	6	0	2	45	1	0	2	30	4	0	2	39	4	0	2	39	N	15	1.159	0
+Contig217_chr6_31393824_31394218	97	G	A	115.0	chr6	31393921	G	9	0	2	54	19	0	2	84	15	0	2	72	12	0	2	63	7	0	2	48	10	0	2	57	N	45	0.477	0
+Contig186_chr6_31928098_31928245	73	G	A	117.0	chr6	-1	N	5	0	2	42	8	0	2	51	2	0	2	33	4	0	2	39	1	0	2	30	5	0	2	42	N	-1	0.276	1
+Contig52_chr6_33188498_33188724	123	G	A	59.0	chr6	-1	N	5	0	2	42	13	0	2	66	8	0	2	51	4	0	2	39	9	0	2	54	9	0	2	54	N	-1	0.880	1
+Contig102_chr6_38743009_38743435	290	A	G	178.0	chr6	38743311	A	11	0	2	60	13	0	2	66	9	0	2	54	11	0	2	60	12	0	2	63	13	0	2	66	Y	34	0.148	4
+Contig81_chr6_49018353_49019532	179	C	A	72.5	chr6	49018530	A	15	0	2	72	13	0	2	66	19	0	2	72	8	0	2	51	12	0	2	63	16	0	2	75	Y	15	0.145	1
+Contig112_chr6_51024554_51024851	100	A	G	121.0	chr6	51024654	A	10	0	2	57	12	0	2	63	9	0	2	54	13	0	2	66	14	0	2	69	17	0	2	78	N	75	4.287	0
+Contig40_chr6_51412751_51413807	227	T	C	94.5	chr6	51412975	C	5	0	2	42	8	0	2	51	7	0	2	48	9	0	2	54	11	0	2	60	10	0	2	57	Y	4	5.661	0
+Contig47_chr6_69073222_69074767	1315	T	C	212.0	chr6	69074558	T	20	0	2	87	17	0	2	78	18	0	2	81	12	0	2	63	17	0	2	78	7	0	2	48	Y	9	0.652	0
+Contig30_chr6_74848932_74849059	57	C	G	46.3	chr6	74848993	C	7	0	2	48	7	0	2	33	6	0	2	45	7	0	2	48	5	0	2	42	6	0	2	45	N	-1	+99.	1
+Contig84_chr7_6648683_6650255	1297	G	A	110.0	chr7	6649988	G	18	0	2	81	9	0	2	54	22	0	2	77	16	0	2	75	20	0	2	87	6	0	2	45	Y	83	0.166	0
+Contig239_chr7_13007379_13007700	275	A	G	39.8	chr7	13007642	A	8	0	2	51	5	0	2	42	8	0	2	51	3	0	2	36	3	0	2	36	5	0	2	42	N	46	1.511	3
+Contig119_chr7_18310707_18310948	23	A	T	133.0	chr7	18310729	A	6	0	2	45	5	0	2	42	10	0	2	57	5	0	2	42	2	0	2	33	2	0	2	33	N	4553	+99.	0
+Contig93_chr7_18513377_18513741	173	T	C	130.0	chr7	18513533	C	15	0	2	72	11	0	2	60	18	0	2	81	6	0	2	45	10	0	2	57	14	0	2	69	Y	115	0.174	0
+Contig133_chr7_19603333_19603776	414	C	G	31.9	chr7	19603734	G	10	0	2	57	4	0	2	39	4	0	2	39	5	0	2	42	9	0	2	54	9	0	2	54	N	78	+99.	5
+Contig132_chr7_20426224_20428145	1815	A	G	28.3	chr7	20428041	A	11	1	2	43	12	0	2	63	19	0	2	84	23	0	2	96	14	0	2	69	10	0	2	57	N	11	0.264	0
+Contig206_chr7_26281823_26282074	103	C	A	101.0	chr7	26281925	T	11	0	2	60	16	0	2	61	19	0	2	84	6	0	2	45	19	0	2	84	16	0	2	75	N	-1	0.947	1
+Contig55_chr7_53147505_53148974	894	A	G	68.4	chr7	53148397	G	22	0	2	93	13	0	2	66	16	0	2	75	8	0	2	51	16	0	2	75	11	0	2	60	Y	19	0.060	0
+Contig4_chr7_53685534_53688206	1709	C	G	76.2	chr7	53687225	C	18	0	2	81	17	0	2	78	18	0	2	81	15	0	2	72	14	0	2	69	14	0	2	69	Y	32	0.659	1
+Contig61_chr7_55832923_55834065	506	T	C	185.0	chr7	55833450	C	9	0	2	54	10	0	2	57	22	0	2	93	12	0	2	63	12	0	2	63	7	0	2	48	Y	1	0.019	0
+Contig91_chr8_12804505_12805470	409	C	A	111.0	chr8	12804906	C	8	0	2	51	10	0	2	57	15	0	2	72	12	0	2	63	14	0	2	69	15	0	2	72	N	145	0.175	0
+Contig8_chr8_27811135_27812620	333	C	T	37.9	chr8	27811458	C	4	0	2	39	11	0	2	60	18	0	2	81	5	0	2	42	6	0	2	45	5	0	2	42	Y	1	0.272	0
+Contig66_chr8_28273102_28273660	175	G	C	81.6	chr8	28273263	T	9	0	2	54	17	0	2	78	19	0	2	84	8	0	2	51	16	0	2	75	19	0	2	84	Y	3	2.735	0
+Contig84_chr8_31375511_31376456	443	T	C	125.0	chr8	31375954	T	10	0	2	57	15	0	2	72	27	0	2	108	18	0	2	81	16	0	2	75	9	0	2	54	Y	2	0.650	0
+Contig18_chr8_32575859_32577431	264	T	C	151.0	chr8	32576124	T	20	0	2	87	14	0	2	69	17	0	2	78	14	0	2	69	13	0	2	66	14	0	2	69	Y	17	0.915	1
+Contig54_chr8_40913908_40916451	1275	G	A	175.0	chr8	40915190	G	10	0	2	57	8	0	2	51	11	0	2	60	7	0	2	48	8	0	2	51	9	0	2	54	Y	21	0.056	3
+Contig93_chr8_44658786_44659075	180	T	G	55.3	chr8	44658964	T	4	0	2	39	3	0	2	36	6	0	2	45	5	0	2	45	5	0	2	42	4	0	2	39	N	14	0.188	0
+Contig66_chr8_58562376_58563446	345	C	G	5.74	chr8	58562721	C	14	0	2	69	12	0	2	63	9	0	2	57	10	0	2	57	9	0	2	54	10	0	2	57	Y	6	0.685	0
+Contig44_chr8_71186368_71188207	1455	G	T	147.0	chr8	71187818	G	4	10	1	74	3	0	2	36	20	0	2	87	12	0	2	63	8	0	2	51	10	0	2	57	Y	88	0.036	0
+Contig73_chr9_29451535_29452248	616	A	G	24.7	chr9	29452127	G	4	0	2	39	7	0	2	48	1	0	2	30	4	0	2	39	7	0	2	48	6	0	2	45	N	49	0.448	4
+Contig96_chr9_39008495_39009278	215	A	C	98.7	chr9	39008708	C	7	0	2	48	13	0	2	66	28	0	2	111	16	0	2	75	17	0	2	78	17	0	2	78	Y	8	0.427	1
+Contig69_chr10_40547265_40548153	371	G	A	58.1	chr10	40547649	A	9	0	2	54	8	0	2	51	8	0	2	51	9	0	2	54	4	0	2	39	5	0	2	42	Y	20	0.138	4
+Contig63_chr10_42716594_42719945	1018	A	G	88.7	chr10	42717616	G	13	0	2	66	14	0	2	69	13	0	2	66	12	0	2	63	18	0	2	81	5	0	2	42	Y	25	1.740	0
+Contig22_chr10_43255307_43255570	81	C	A	37.2	chr10	43255383	C	15	0	2	72	18	0	2	81	22	0	2	93	16	0	2	75	11	0	2	60	12	0	2	63	N	62	0.450	0
+Contig9_chr10_51475063_51476054	770	C	T	57.3	chr10	51475839	C	6	0	2	45	16	0	2	75	16	0	2	75	13	0	2	66	9	0	2	54	9	2	2	21	N	80	0.394	0
+Contig42_chr10_53816543_53818392	1642	G	A	27.5	chr10	53818172	A	7	0	2	48	13	0	2	66	17	0	2	78	14	0	2	69	19	0	2	84	16	0	2	75	N	1	0.433	0
+Contig36_chr10_53992615_53993741	229	G	C	86.2	chr10	53992846	G	17	0	2	78	14	0	2	69	13	0	2	66	15	0	2	72	12	0	2	63	15	0	2	72	N	23	1.912	0
+Contig20_chr10_58141129_58141750	575	C	T	46.1	chr10	58141701	C	7	0	2	48	8	0	2	51	9	0	2	54	3	0	2	36	4	0	2	39	9	0	2	54	N	1	4.264	0
+Contig26_chr10_59510973_59511899	146	C	A	29.0	chr10	59511126	C	8	0	2	51	13	0	2	66	18	0	2	81	13	0	2	66	10	0	2	57	7	0	2	48	Y	208	1.077	0
+Contig72_chr11_7142765_7143772	146	G	A	152.0	chr11	7142911	A	8	0	2	51	8	0	2	51	24	0	2	99	10	0	2	57	17	0	2	78	11	0	2	60	Y	90	1.137	0
+Contig9_chr11_9904571_9905983	1284	C	T	151.0	chr11	9905857	C	16	0	2	75	19	0	2	84	17	0	2	78	16	0	2	75	12	0	2	63	13	1	2	44	Y	11	0.422	1
+Contig7_chr11_40017076_40017630	352	C	T	46.3	chr11	40017422	C	7	0	2	48	9	0	2	54	6	0	2	45	8	0	2	51	16	0	2	75	9	0	2	54	Y	44	0.336	0
+Contig108_chr11_42953408_42955156	367	A	G	89.4	chr11	42953779	A	17	0	2	78	11	0	2	60	14	0	2	69	20	0	2	87	14	0	2	69	17	0	2	78	Y	118	0.784	1
+Contig16_chr11_53408448_53408790	187	A	G	153.0	chr11	53408638	A	7	0	2	48	9	0	2	54	18	0	2	81	10	0	2	57	11	0	2	60	12	0	2	63	Y	116	1.367	0
+Contig21_chr12_18403415_18404381	586	G	T	34.5	chr12	18403983	-	13	0	2	66	16	0	2	75	25	0	2	102	12	0	2	63	12	0	2	63	14	0	2	69	Y	12	0.068	0
+Contig33_chr12_19804073_19804529	178	T	C	69.4	chr12	19804261	T	13	0	2	66	13	0	2	66	22	0	2	93	11	0	2	60	12	0	2	63	18	0	2	81	Y	11	1.571	0
+Contig41_chr12_25565452_25566993	475	G	T	6.29	chr12	25565926	G	15	0	2	72	14	0	2	69	10	0	2	57	15	0	2	72	18	0	2	81	19	0	2	84	N	10	2.231	1
+Contig9_chr12_27204351_27204696	239	A	G	145.0	chr12	27204587	A	7	0	2	48	8	0	2	51	12	0	2	63	8	0	2	51	11	0	2	60	11	0	2	60	Y	14	0.046	0
+Contig45_chr12_30548282_30550498	448	C	T	124.0	chr12	30548703	-	9	0	2	54	11	0	2	60	22	0	2	93	19	0	2	84	12	0	2	63	12	0	2	63	Y	66	0.305	0
+Contig46_chr12_35571846_35572563	58	G	C	83.2	chr12	35571906	G	4	0	2	39	10	0	2	57	11	0	2	60	6	0	2	45	10	0	2	57	6	0	2	45	Y	55	+99.	1
+Contig28_chr12_42075871_42076044	136	G	A	134.0	chr12	42076006	A	6	0	2	45	5	0	2	42	7	0	2	48	7	0	2	48	2	0	2	33	4	0	2	39	N	3	9.479	0
+Contig16_chr12_42386141_42387454	194	A	G	161.0	chr12	42386323	A	11	0	2	60	8	0	2	54	23	0	2	96	17	0	2	78	6	0	2	45	13	0	2	66	Y	7	0.927	1
+Contig42_chr12_44424628_44425829	255	A	G	84.4	chr12	44424879	A	12	0	2	63	19	0	2	84	23	0	2	96	15	0	2	72	18	0	2	81	14	0	2	69	Y	18	1.190	2
+Contig10_chr12_44447953_44449698	63	C	T	105.0	chr12	44448020	C	11	0	2	60	9	0	2	54	12	0	2	63	10	0	2	57	15	0	2	72	8	0	2	51	Y	31	11.791	0
+Contig5_chr12_53880670_53882675	1221	A	C	99.4	chr12	53881888	A	16	0	2	75	18	0	2	81	23	0	2	96	10	0	2	57	15	0	2	72	17	0	2	78	Y	31	0.061	0
+Contig86_chr12_56715356_56716464	818	T	C	166.0	chr12	56716164	T	20	0	2	87	16	0	2	75	16	0	2	75	14	0	2	69	13	0	2	66	7	0	2	48	Y	22	1.092	0
+Contig3_chr12_65021967_65024097	238	T	G	92.6	chr12	65022205	T	17	0	2	78	14	0	2	69	16	0	2	75	9	0	2	54	13	0	2	66	15	0	2	72	Y	258	0.117	0
+Contig43_chr12_66499742_66500010	121	G	T	41.5	chr12	66499866	G	12	0	2	63	4	0	2	39	8	0	2	51	6	0	2	45	10	0	2	57	6	0	2	45	N	42	0.421	0
+Contig14_chr12_71364692_71365311	20	A	C	103.0	chr12	71364712	A	7	0	2	48	3	0	2	36	5	0	2	42	1	0	2	30	2	0	2	33	3	0	2	36	Y	35	+99.	0
+Contig37_chr13_15910164_15910426	245	G	A	32.9	chr13	-1	N	3	4	1	41	4	0	2	39	3	0	2	36	4	0	2	39	3	0	2	36	10	0	2	57	N	-1	2.159	1
+Contig107_chr13_26045881_26046290	341	C	G	81.4	chr13	26046230	C	16	0	2	75	20	0	2	90	14	0	2	69	15	0	2	72	9	0	2	54	9	0	2	54	Y	51	4.510	0
+Contig251_chr13_28498333_28501066	864	T	G	296.0	chr13	28499180	T	3	0	2	36	5	0	2	42	4	0	2	39	2	0	2	33	5	0	2	42	6	0	2	45	Y	9	0.068	0
+Contig154_chr13_36777857_36778736	356	G	A	95.5	chr13	36778225	A	6	0	2	45	11	0	2	60	11	0	2	60	9	0	2	54	13	0	2	66	8	0	2	51	Y	59	0.192	0
+Contig37_chr13_42529793_42530857	150	G	T	192.0	chr13	42529926	G	18	0	2	81	14	0	2	69	16	0	2	75	14	0	2	69	8	0	2	51	11	0	2	60	N	22	0.795	5
+Contig47_chr13_47045833_47046626	257	A	C	28.5	chr13	47046097	A	13	0	2	66	10	0	2	57	17	0	2	78	20	0	2	87	15	0	2	72	9	0	2	57	N	129	0.468	0
+Contig42_chr13_47730018_47730856	254	A	G	75.1	chr13	47730294	A	13	0	2	66	6	0	2	45	12	0	2	63	9	0	2	54	16	0	2	75	11	0	2	63	Y	630	0.049	1
+Contig55_chr13_53467708_53468101	221	T	G	132.0	chr13	53467925	T	25	0	2	102	12	0	2	63	26	0	2	105	7	0	2	48	16	0	2	75	16	0	2	75	N	20	5.717	1
+Contig49_chr13_55103679_55105532	503	G	A	76.0	chr13	55104178	G	21	0	2	90	19	0	2	84	18	0	2	81	20	0	2	87	8	9	1	89	17	0	2	78	Y	20	0.259	1
+Contig66_chr13_66021813_66022244	319	C	T	125.0	chr13	66022136	C	11	0	2	60	16	0	2	75	15	0	2	75	12	0	2	63	17	0	2	78	8	0	2	51	N	14	0.055	3
+Contig48_chr14_11839435_11843272	3014	A	G	163.0	chr14	11842446	A	10	0	2	57	8	0	2	51	13	0	2	66	10	0	2	57	5	0	2	42	10	0	2	57	Y	31	0.908	0
+Contig9_chr14_23353717_23354432	80	G	A	61.3	chr14	23353797	G	3	0	2	36	6	0	2	45	11	0	2	60	8	0	2	51	4	0	2	39	2	4	1	35	Y	11	0.444	0
+Contig14_chr14_24131180_24133488	1633	G	A	131.0	chr14	24132818	G	21	0	2	90	16	0	2	75	12	0	2	63	10	0	2	57	11	0	2	60	20	0	2	87	Y	36	0.347	0
+Contig28_chr14_26905747_26909514	975	G	C	3.13	chr14	26906723	G	16	0	2	75	10	0	2	57	12	0	2	63	15	0	2	72	10	0	2	57	7	0	2	48	N	287	0.117	2
+Contig14_chr14_29616948_29618316	109	G	A	80.3	chr14	29617053	-	17	0	2	78	16	0	2	75	16	0	2	75	10	0	2	57	17	0	2	78	19	0	2	84	Y	32	1.051	0
+Contig76_chr14_30028102_30029179	1046	C	T	38.5	chr14	30029169	T	3	0	2	36	6	0	2	45	9	0	2	54	7	0	2	48	9	0	2	54	8	0	2	51	Y	96	+99.	0
+Contig115_chr14_31417207_31417574	259	A	G	12.1	chr14	31417454	G	13	0	2	66	15	0	2	72	21	0	2	90	12	0	2	63	13	0	2	66	9	0	2	54	N	28	5.379	2
+Contig70_chr14_46653662_46653790	111	G	A	46.7	chr14	46653768	G	7	0	2	48	5	0	2	42	11	0	2	60	11	0	2	60	8	0	2	51	10	0	2	57	N	21	+99.	2
+Contig43_chr14_49991855_49993511	918	A	G	112.0	chr14	49992767	G	15	0	2	72	10	0	2	57	11	0	2	63	9	0	2	54	12	0	2	63	9	0	2	54	Y	6	0.314	1
+Contig64_chr14_56768376_56768902	473	C	T	29.0	chr14	56768832	C	15	0	2	72	11	0	2	60	14	0	2	69	14	0	2	69	7	0	2	48	9	0	2	54	Y	91	8.281	0
+Contig60_chr15_18493036_18494316	150	G	A	92.6	chr15	18493188	G	9	0	2	54	13	0	2	66	9	0	2	54	6	0	2	45	5	0	2	42	12	0	2	63	Y	45	0.125	0
+Contig59_chr15_22138344_22138535	120	G	C	142.0	chr15	22138470	C	11	0	2	60	10	0	2	57	18	0	2	81	4	0	2	39	10	0	2	57	15	0	2	72	N	8	2.553	0
+Contig112_chr15_26772864_26773267	374	C	T	21.6	chr15	26773244	C	4	0	2	39	4	0	2	39	5	0	2	42	2	0	2	33	4	0	2	39	3	0	2	36	N	18	+99.	0
+Contig24_chr15_26894765_26895003	155	G	A	87.6	chr15	-1	N	6	0	2	45	5	0	2	42	7	0	2	48	4	0	2	39	4	0	2	39	2	0	2	33	N	-1	0.178	0
+Contig2_chr15_33944796_33947182	1860	G	A	99.5	chr15	33946654	G	10	0	2	57	11	0	2	60	16	0	2	75	14	0	2	69	14	0	2	69	16	0	2	75	Y	16	0.252	0
+Contig73_chr15_34690052_34691332	714	T	C	130.0	chr15	34690769	T	7	0	2	48	7	0	2	48	17	0	2	78	9	0	2	54	9	0	2	54	4	0	2	39	Y	7	6.003	0
+Contig68_chr15_37747190_37747426	126	G	A	130.0	chr15	37747331	G	14	0	2	69	14	0	2	69	11	0	2	63	19	0	2	84	13	0	2	66	21	0	2	90	N	229	0.255	0
+Contig104_chr15_45106954_45107158	70	A	T	64.4	chr15	45107015	A	6	0	2	45	6	0	2	45	19	0	2	84	7	0	2	48	7	0	2	48	3	0	2	36	N	202	4.319	0
+Contig119_chr16_6160274_6160477	180	G	A	54.8	chr16	6160457	G	7	0	2	48	6	0	2	45	12	0	2	63	3	0	2	36	11	0	2	60	10	0	2	57	N	42	+99.	0
+Contig126_chr16_10611887_10612152	150	G	T	145.0	chr16	10612037	G	14	0	2	69	9	0	2	54	11	0	2	63	8	0	2	51	8	0	2	51	11	0	2	60	N	15	0.104	6
+Contig43_chr16_20200090_20200514	70	A	G	58.6	chr16	20200154	A	11	0	2	60	15	0	2	72	15	0	2	72	6	0	2	45	9	0	2	54	12	0	2	63	Y	2	0.466	1
+Contig60_chr16_28079136_28080263	588	T	G	157.0	chr16	28079739	T	22	0	2	93	20	0	2	87	22	0	2	93	17	0	2	78	12	0	2	63	10	0	2	57	Y	105	5.999	1
+Contig70_chr16_33758668_33759655	104	A	T	58.1	chr16	33758772	A	6	0	2	45	7	0	2	48	17	0	2	78	14	0	2	69	8	0	2	51	10	0	2	57	N	54	0.162	0
+Contig66_chr16_37935682_37935831	116	T	C	99.2	chr16	37935802	C	12	0	2	63	6	0	2	45	19	0	2	84	12	0	2	63	13	0	2	66	17	0	2	78	N	266	+99.	2
+Contig16_chr16_40451506_40451643	84	A	G	59.8	chr16	40451592	A	7	0	2	48	5	0	2	42	7	0	2	48	13	0	2	66	14	0	2	69	19	0	2	84	N	45	5.061	0
+Contig31_chr17_12128267_12129637	205	G	A	90.5	chr17	12128484	G	7	0	2	48	6	0	2	45	6	0	2	45	11	0	2	60	7	0	2	48	4	0	2	39	Y	10	0.246	0
+Contig1_chr17_12979232_12980380	808	G	T	12.3	chr17	12980028	G	18	0	2	81	12	0	2	63	21	0	2	90	13	0	2	66	22	0	2	93	18	0	2	81	Y	9	0.336	1
+Contig42_chr17_23434859_23438330	2100	C	T	39.5	chr17	23436985	T	4	0	2	39	7	0	2	48	7	0	2	48	3	0	2	36	6	0	2	45	2	0	2	33	Y	25	0.344	0
+Contig63_chr17_23796320_23796814	220	A	G	54.0	chr17	23796536	G	6	0	2	45	4	0	2	39	5	0	2	42	6	0	2	45	4	0	2	39	6	0	2	45	Y	139	0.067	1
+Contig76_chr17_24107434_24107834	316	T	C	141.0	chr17	24107726	T	19	0	2	84	15	0	2	72	20	0	2	87	16	0	2	75	11	0	2	60	18	0	2	81	Y	30	0.175	2
+Contig99_chr17_26021506_26022200	505	C	T	88.8	chr17	26022017	T	15	0	2	72	13	0	2	66	19	0	2	84	9	0	2	54	10	0	2	57	11	0	2	60	Y	1	0.172	1
+Contig59_chr17_26790302_26795045	287	C	T	45.1	chr17	26790582	C	8	0	2	51	6	0	2	45	13	0	2	66	6	0	2	45	15	0	2	72	12	0	2	63	Y	75	0.019	1
+Contig99_chr17_27018324_27019378	446	G	A	31.1	chr17	27018776	G	14	0	2	69	12	0	2	63	14	0	2	69	10	0	2	57	9	0	2	54	11	0	2	60	Y	13	0.290	4
+Contig125_chr17_27739115_27739410	63	G	A	107.0	chr17	27739177	G	8	0	2	51	11	0	2	60	16	0	2	75	8	0	2	51	4	0	2	39	15	0	2	72	N	100	0.819	0
+Contig115_chr17_37489899_37490101	159	G	A	62.4	chr17	37490067	G	4	0	2	39	3	0	2	36	4	0	2	39	4	0	2	39	3	0	2	36	6	0	2	45	N	4	1.411	1
+Contig180_chr17_45154356_45154925	524	A	G	146.0	chr17	45154886	G	7	0	2	48	9	0	2	54	7	0	2	48	9	0	2	54	4	0	2	39	8	0	2	51	Y	11	+99.	2
+Contig61_chr17_48221795_48223545	1404	T	A	177.0	chr17	48223216	T	15	0	2	72	14	0	2	69	24	0	2	99	17	0	2	78	18	0	2	81	24	0	2	99	Y	161	0.633	2
+Contig27_chr17_61713766_61716585	1056	G	C	40.0	chr17	61714821	G	4	0	2	39	8	0	2	51	10	0	2	57	6	0	2	45	6	0	2	45	3	0	2	36	N	6	2.200	4
+Contig229_chr18_3706523_3708577	1076	A	G	83.9	chr18	3707630	A	11	0	2	60	13	0	2	66	26	0	2	105	11	0	2	60	15	0	2	72	17	0	2	78	Y	63	0.445	0
+Contig24_chr18_14049894_14050480	24	A	G	123.0	chr18	14049918	A	5	0	2	42	5	0	2	42	4	0	2	39	6	0	2	45	7	0	2	48	5	0	2	42	Y	17	+99.	0
+Contig123_chr18_19916160_19916379	116	G	A	79.2	chr18	19916272	A	14	0	2	69	12	0	2	63	14	0	2	69	6	0	2	45	11	0	2	60	10	0	2	57	N	26	0.172	0
+Contig82_chr18_27305489_27306229	566	C	T	49.5	chr18	27306051	A	6	0	2	45	6	0	2	45	10	0	2	57	11	0	2	60	6	0	2	45	7	0	2	48	N	1	0.349	0
+Contig71_chr18_34324706_34326687	136	G	A	151.0	chr18	34324841	G	9	0	2	54	9	0	2	54	17	0	2	78	8	0	2	51	11	0	2	60	10	0	2	57	Y	2	2.129	2
+Contig16_chr18_34672093_34673044	538	T	C	58.2	chr18	34672635	T	8	0	2	51	15	0	2	72	16	0	2	75	15	0	2	72	9	0	2	57	18	0	2	81	Y	8	0.214	1
+Contig96_chr18_38492535_38493333	624	G	A	119.0	chr18	38493162	T	17	0	2	78	12	0	2	63	13	0	2	66	16	0	2	75	8	0	2	51	15	0	2	72	Y	127	0.131	0
+Contig226_chr18_47753756_47754666	427	T	C	21.1	chr18	47754215	T	10	0	2	57	4	0	2	39	8	0	2	51	5	0	2	42	6	0	2	45	7	0	2	48	Y	42	0.522	0
+Contig170_chr18_49411558_49412230	94	C	A	74.3	chr18	49411655	C	14	0	2	69	10	0	2	57	9	0	2	54	10	0	2	57	3	0	2	36	3	0	2	36	N	9	1.457	0
+Contig192_chr18_49419342_49420737	1058	C	T	42.8	chr18	49420381	A	3	0	2	36	4	0	2	39	5	0	2	42	8	0	2	51	3	0	2	36	3	0	2	36	Y	34	2.107	2
+Contig64_chr18_55979770_55980315	49	G	A	89.1	chr18	55979824	G	3	0	2	36	9	0	2	54	7	0	2	51	4	0	2	39	3	0	2	36	3	0	2	36	Y	-1	2.124	0
+Contig20_chr18_58130301_58130735	112	A	G	74.4	chr18	58130413	A	12	0	2	66	11	0	2	60	11	0	2	60	12	0	2	63	6	0	2	45	6	0	2	45	Y	10	0.290	0
+Contig67_chr19_12398520_12399367	499	C	T	161.0	chr19	12399017	C	10	0	2	57	11	0	2	60	20	0	2	87	14	0	2	69	24	0	2	99	8	0	2	51	Y	137	5.634	0
+Contig66_chr19_16285672_16287223	996	C	T	190.0	chr19	16286674	C	9	0	2	57	14	0	2	69	16	0	2	78	17	0	2	78	8	0	2	51	22	0	2	93	Y	40	0.110	0
+Contig129_chr19_25541958_25542221	202	T	C	68.1	chr19	25542154	C	11	0	2	60	19	0	2	84	10	0	2	60	17	0	2	78	9	0	2	54	12	0	2	63	N	-1	2.551	1
+Contig29_chr19_37339947_37341911	1692	C	T	211.0	chr19	37341631	C	15	0	2	72	20	0	2	87	11	0	2	60	15	0	2	72	3	0	2	36	12	0	2	63	Y	7	0.096	0
+Contig39_chr19_47709708_47711327	444	C	T	36.8	chr19	47710148	T	10	0	2	57	4	0	2	39	8	0	2	51	9	0	2	54	6	0	2	45	6	0	2	45	Y	95	1.251	1
+Contig60_chr19_54013816_54014398	281	A	G	138.0	chr19	54014103	C	6	0	2	45	15	0	2	72	7	0	2	48	10	0	2	57	15	0	2	72	10	0	2	57	Y	188	1.271	0
+Contig251_chr19_56559098_56559626	452	T	C	3.36	chr19	56559549	T	12	0	2	63	13	0	2	66	21	0	2	90	15	0	2	72	14	0	2	69	11	0	2	60	N	1	0.117	0
+Contig50_chr20_12138509_12141975	3206	C	A	248.0	chr20	12141763	C	8	0	2	51	15	0	2	72	14	0	2	69	6	0	2	45	10	0	2	57	7	0	2	48	Y	2	0.384	0
+Contig36_chr20_32631363_32632049	176	G	A	24.1	chr20	32631526	G	7	0	2	48	14	0	2	69	19	0	2	84	14	0	2	69	15	0	2	72	16	0	2	75	N	50	1.150	0
+Contig32_chr20_36468058_36468869	66	C	T	40.4	chr20	36468127	C	6	0	2	45	3	0	2	36	4	0	2	39	5	0	2	42	3	0	2	36	4	0	2	39	N	59	0.281	0
+Contig24_chr20_38203888_38204900	834	C	T	132.0	chr20	38204731	C	9	0	2	54	17	0	2	78	20	0	2	87	8	0	2	51	11	0	2	60	17	0	2	78	Y	14	0.397	0
+Contig79_chr20_44263127_44264103	456	G	T	31.5	chr20	44263573	G	22	0	2	93	16	0	2	75	15	0	2	72	19	0	2	84	13	0	2	66	26	0	2	105	Y	8	3.250	0
+Contig26_chr20_45878482_45878787	197	A	G	160.0	chr20	45878672	A	17	0	2	78	15	0	2	72	11	0	2	63	17	0	2	78	12	0	2	63	10	0	2	57	N	14	0.535	0
+Contig119_chr20_46550670_46551383	609	G	A	139.0	chr20	46551277	G	7	0	2	48	17	0	2	78	19	0	2	84	20	0	2	87	9	0	2	54	15	0	2	72	Y	7	0.488	1
+Contig50_chr21_4178523_4178687	121	G	A	362.0	chr21	4178640	G	8	0	2	51	14	0	2	69	5	0	2	42	3	0	2	36	11	0	2	60	4	0	2	39	N	392	0.483	0
+Contig103_chr21_10177255_10177765	121	G	A	125.0	chr21	10177367	G	12	0	2	63	10	0	2	57	10	0	2	57	17	0	2	78	14	0	2	69	7	0	2	51	Y	37	0.213	3
+Contig1_chr21_10805534_10806399	766	A	G	146.0	chr21	10806301	G	10	0	2	57	6	0	2	45	9	0	2	54	6	0	2	45	7	0	2	48	5	0	2	42	Y	20	0.319	0
+Contig46_chr21_21029492_21030645	443	C	T	5.37	chr21	21029910	C	15	0	2	72	11	0	2	60	16	0	2	75	15	0	2	72	13	0	2	66	6	0	2	45	Y	96	3.737	0
+Contig129_chr21_31045749_31046924	381	A	G	129.0	chr21	31046141	A	19	0	2	84	8	0	2	51	23	0	2	96	12	0	2	63	15	0	2	72	18	0	2	81	Y	69	0.028	2
+Contig23_chr21_31651123_31651986	840	C	T	71.3	chr21	31651957	T	6	0	2	45	9	0	2	54	8	0	2	51	10	0	2	57	4	0	2	39	7	0	2	48	Y	105	2.977	3
+Contig64_chr21_43341847_43342031	84	T	C	114.0	chr21	43341926	T	11	0	2	60	9	0	2	54	10	0	2	57	6	0	2	45	6	0	2	45	7	0	2	48	N	10	3.954	2
+Contig60_chr21_43475347_43475824	175	C	T	8.05	chr21	43475551	T	6	0	2	45	7	0	2	48	13	0	2	66	6	0	2	45	14	0	2	69	14	0	2	69	N	45	0.058	0
+Contig159_chr22_7896450_7896974	109	G	C	151.0	chr22	7896570	G	16	0	2	75	5	7	1	62	14	0	2	69	16	0	2	75	13	0	2	66	13	0	2	66	Y	16	0.465	0
+Contig46_chr22_9416920_9417467	381	G	A	145.0	chr22	9417259	G	10	0	2	57	9	0	2	54	10	0	2	57	6	0	2	45	13	0	2	66	7	0	2	48	Y	154	0.242	0
+Contig86_chr22_9440787_9441725	713	T	G	119.0	chr22	9441488	G	6	0	2	45	12	0	2	63	10	0	2	57	11	0	2	60	13	0	2	66	16	0	2	75	Y	132	0.218	0
+Contig16_chr22_15636960_15637372	236	A	C	9.79	chr22	15637192	T	4	0	2	39	5	0	2	42	12	0	2	63	7	0	2	48	6	0	2	45	11	0	2	60	Y	5	2.163	0
+Contig4_chr22_16114310_16114546	128	G	C	101.0	chr22	16114432	G	10	0	2	57	13	0	2	66	20	0	2	87	20	0	2	87	16	0	2	75	9	0	2	54	N	19	0.526	0
+Contig23_chr22_34612023_34612568	167	C	G	92.3	chr22	34612181	C	11	0	2	60	18	0	2	81	13	0	2	66	8	0	2	51	12	0	2	63	14	0	2	69	Y	7	0.409	0
+Contig4_chr22_38252245_38253712	799	A	C	159.0	chr22	38253064	A	18	0	2	81	15	0	2	72	15	0	2	72	20	0	2	87	27	0	2	108	15	0	2	72	Y	90	4.330	0
+Contig122_chr22_48412466_48414788	1888	C	T	125.0	chr22	48414355	T	16	0	2	75	15	0	2	72	16	0	2	75	14	0	2	72	12	0	2	63	7	0	2	48	N	42	0.122	0
+Contig77_chr22_49764414_49764875	353	C	A	148.0	chr22	49764777	C	7	4	1	65	18	0	2	81	16	0	2	75	20	0	2	87	4	3	1	52	9	4	1	67	Y	12	0.941	0
+Contig26_chr22_57817664_57819633	1453	A	G	150.0	chr22	57819121	G	9	0	2	54	9	0	2	54	13	0	2	66	15	0	2	72	11	0	2	60	14	0	2	69	N	15	0.471	1
+Contig348_chr22_62406104_62406495	189	C	A	134.0	chr22	62406302	A	9	0	2	54	14	0	2	69	11	0	2	60	10	0	2	57	12	0	2	63	6	0	2	45	Y	5	0.912	0
+Contig133_chr23_3525134_3526502	1223	A	G	201.0	chr23	3526387	A	11	0	2	60	13	0	2	66	23	0	2	96	21	0	2	90	13	0	2	66	10	0	2	57	Y	61	1.359	0
+Contig111_chr23_7058063_7058181	107	G	A	108.0	chr23	7058162	A	8	0	2	51	8	0	2	51	7	0	2	48	2	0	2	33	5	0	2	42	6	0	2	45	N	3	+99.	0
+Contig79_chr23_7844129_7844837	110	C	A	141.0	chr23	7844237	T	13	0	2	66	15	0	2	72	17	0	2	78	12	0	2	63	15	0	2	72	16	0	2	75	Y	40	0.339	0
+Contig38_chr23_9201002_9201725	597	C	T	155.0	chr23	9201609	T	17	0	2	78	8	0	2	51	13	0	2	66	5	0	2	42	11	0	2	60	7	0	2	48	Y	167	0.633	1
+Contig33_chr23_20672540_20674320	347	T	A	91.4	chr23	20672885	A	11	0	2	60	14	0	2	69	15	0	2	72	7	0	2	48	12	0	2	63	18	0	2	81	Y	31	0.452	1
+Contig35_chr23_28447813_28449115	70	T	A	21.3	chr23	28447881	T	9	0	2	54	8	0	2	51	10	0	2	57	9	0	2	54	10	0	2	57	12	0	2	63	N	251	0.163	1
+Contig51_chr23_30590939_30591162	140	C	T	142.0	chr23	30591080	C	14	0	2	69	4	0	2	39	10	0	2	57	12	0	2	63	14	0	2	69	4	0	2	39	N	13	1.658	0
+Contig57_chr23_32216351_32216721	179	T	G	143.0	chr23	32216534	T	15	0	2	72	15	0	2	72	23	0	2	96	13	0	2	66	16	0	2	75	15	0	2	72	N	32	1.387	1
+Contig93_chr23_35744841_35745791	40	A	T	30.4	chr23	35744880	T	6	0	2	45	7	0	2	48	7	0	2	48	2	0	2	33	5	0	2	42	5	0	2	42	Y	50	2.173	0
+Contig32_chr23_48285289_48286638	186	T	C	176.0	chr23	48285470	T	18	0	2	81	12	0	2	63	16	0	2	75	13	0	2	66	9	0	2	54	9	0	2	54	Y	4	4.238	1
+Contig50_chr24_22515247_22516072	761	C	T	243.0	chr24	22515981	T	11	0	2	60	10	0	2	57	8	0	2	51	9	0	2	54	18	0	2	81	8	0	2	51	Y	1	0.190	0
+Contig84_chr24_29196623_29199644	466	C	T	126.0	chr24	29197091	T	7	0	2	48	11	0	2	60	8	0	2	51	7	0	2	48	11	0	2	60	15	0	2	72	Y	42	0.215	0
+Contig145_chr24_34778364_34778898	163	T	C	372.0	chr24	34778541	C	10	0	2	57	8	0	2	51	12	0	2	63	12	0	2	63	6	1	2	31	7	0	2	48	Y	40	0.037	0
+Contig34_chr24_36147443_36150244	2679	C	T	140.0	chr24	36150125	C	13	0	2	66	7	0	2	48	14	0	2	69	14	0	2	69	10	0	2	57	13	0	2	66	N	282	0.099	1
+Contig164_chr24_46598127_46599206	84	C	T	105.0	chr24	46598214	C	13	0	2	66	12	0	2	63	15	0	2	72	15	0	2	72	11	0	2	60	8	0	2	51	Y	22	1.262	1
+Contig144_chr25_4011170_4013134	541	A	G	160.0	chr25	4011690	A	12	0	2	63	17	0	2	78	13	0	2	66	13	0	2	66	13	0	2	66	13	0	2	66	Y	5	0.087	0
+Contig81_chr25_6103472_6104760	699	G	A	378.0	chr25	6104190	A	14	0	2	69	16	0	2	75	13	0	2	66	11	0	2	60	11	0	2	60	12	0	2	63	Y	33	0.789	2
+Contig152_chr25_7486442_7487609	75	A	G	11.6	chr25	7486515	A	17	0	2	78	13	0	2	66	8	0	2	51	16	0	2	75	8	0	2	51	6	0	2	45	N	2	0.158	0
+Contig24_chr25_7695778_7698612	2714	C	T	130.0	chr25	7698446	C	16	0	2	75	13	0	2	66	22	0	2	93	17	0	2	78	10	0	2	57	17	0	2	78	Y	27	0.346	0
+Contig89_chr25_8635170_8636009	586	G	C	209.0	chr25	8635744	G	13	0	2	66	13	0	2	66	21	0	2	93	14	0	2	69	15	0	2	72	15	0	2	72	Y	14	0.067	0
+Contig59_chr25_18196776_18197707	785	G	A	112.0	chr25	18197551	G	8	10	1	42	27	0	2	108	21	0	2	90	18	0	2	81	10	0	2	57	14	0	2	69	N	36	3.625	0
+Contig103_chr25_38891221_38892140	407	G	A	131.0	chr25	38891644	G	8	0	2	51	14	0	2	69	18	0	2	81	8	0	2	51	8	0	2	51	11	0	2	60	Y	149	0.167	4
+Contig84_chr25_42407960_42408708	55	C	T	119.0	chr25	42408013	C	6	0	2	45	9	0	2	54	11	0	2	60	9	0	2	54	7	0	2	48	8	0	2	51	Y	11	0.121	0
+Contig73_chr25_43562500_43564110	955	T	C	52.1	chr25	43563469	C	9	0	2	57	4	0	2	39	6	0	2	45	5	0	2	42	7	0	2	48	10	0	2	57	Y	4	1.406	0
+Contig37_chr25_51074433_51074885	170	A	G	102.0	chr25	51074589	G	11	0	2	60	7	0	2	48	6	0	2	45	15	0	2	72	9	0	2	54	7	0	2	48	Y	68	0.207	1
+Contig204_chr26_4311195_4311778	170	C	T	16.9	chr26	4311363	T	20	0	2	87	8	0	2	51	13	0	2	66	18	0	2	81	11	0	2	60	14	0	2	69	N	35	0.085	0
+Contig122_chr26_7622321_7623491	106	C	G	139.0	chr26	7622423	C	3	0	2	36	9	0	2	54	10	0	2	57	12	0	2	63	9	0	2	54	5	0	2	42	N	19	0.458	0
+Contig11_chr26_11062142_11062902	707	C	A	108.0	chr26	11062836	T	7	0	2	48	8	0	2	51	16	0	2	75	10	0	2	57	6	0	2	45	14	0	2	69	Y	-1	4.709	0
+Contig133_chr26_17695661_17696368	39	T	G	98.7	chr26	17695700	T	10	0	2	57	3	0	2	36	11	0	2	60	9	0	2	54	2	0	2	33	1	0	2	30	N	85	3.402	0
+Contig146_chr26_26622638_26623906	574	G	A	186.0	chr26	26623219	A	11	0	2	60	12	0	2	63	9	0	2	54	11	0	2	60	9	0	2	54	12	0	2	63	Y	1	0.318	0
+Contig8_chr26_27834126_27834326	140	G	A	41.7	chr26	27834268	G	13	0	2	66	7	0	2	48	13	0	2	66	11	0	2	60	12	0	2	63	6	0	2	45	N	29	0.142	1
+Contig78_chr26_31128839_31129005	123	T	C	145.0	chr26	-1	N	11	0	2	60	3	0	2	36	7	0	2	48	8	0	2	51	10	0	2	46	7	0	2	48	N	-1	1.230	1
+Contig28_chr26_32935355_32935833	289	T	C	77.9	chr26	32935638	T	15	0	2	72	22	0	2	93	15	0	2	72	9	0	2	54	15	0	2	72	17	0	2	78	Y	10	2.258	1
+Contig135_chr27_6853874_6854079	158	C	T	116.0	chr27	6854032	T	18	0	2	81	19	0	2	84	13	0	2	66	7	0	2	48	8	0	2	51	11	0	2	60	N	4	0.060	1
+Contig47_chr27_11777710_11777915	25	A	G	67.3	chr27	11777731	A	3	0	2	36	5	0	2	42	6	0	2	45	10	0	2	57	9	0	2	54	6	0	2	45	N	97	+99.	0
+Contig23_chr27_14633002_14633153	23	G	A	128.0	chr27	14633023	A	3	0	2	36	4	0	2	39	5	0	2	42	5	0	2	42	3	0	2	36	2	0	2	33	N	240	3.881	0
+Contig29_chr27_15428166_15429413	380	T	C	140.0	chr27	15428539	T	15	0	2	72	15	0	2	72	17	0	2	78	15	0	2	72	15	0	2	72	15	0	2	72	Y	47	0.916	1
+Contig31_chr27_19519489_19520891	129	G	T	14.9	chr27	19519624	T	12	0	2	63	19	0	2	84	20	0	2	87	16	0	2	75	10	0	2	57	11	0	2	60	Y	48	2.756	0
+Contig35_chr27_40596169_40596445	20	G	C	133.0	chr27	40596189	G	8	0	2	51	3	0	2	36	4	0	2	39	2	0	2	33	4	0	2	39	4	0	2	39	Y	4	+99.	1
+Contig85_chr27_45471750_45472022	211	G	A	53.1	chr27	45471964	G	18	0	2	81	10	0	2	57	15	0	2	72	0	13	0	36	16	0	2	75	14	0	2	69	N	75	2.502	1
+Contig131_chr28_6481806_6483783	138	C	T	36.2	chr28	6481953	C	12	0	2	63	12	0	2	63	20	0	2	87	11	0	2	60	10	0	2	57	12	0	2	63	Y	10	0.387	0
+Contig141_chr28_10027332_10028242	780	T	G	74.8	chr28	10028095	T	10	0	2	57	11	0	2	60	14	0	2	69	10	0	2	57	7	0	2	48	9	0	2	54	Y	19	3.348	0
+Contig144_chr28_15468203_15470548	743	G	A	20.0	chr28	15468942	G	13	0	2	66	12	0	2	63	10	0	2	57	11	0	2	60	16	0	2	75	7	0	2	48	N	14	0.053	0
+Contig47_chr28_21311718_21312366	541	G	A	116.0	chr28	21312258	G	9	0	2	54	6	0	2	45	12	0	2	63	6	0	2	45	5	0	2	45	12	0	2	63	N	9	0.240	0
+Contig60_chr28_30197166_30197364	92	T	C	164.0	chr28	30197258	T	10	0	2	57	13	0	2	66	15	0	2	72	16	0	2	75	12	0	2	63	11	0	2	60	N	369	1.139	0
+Contig29_chr29_4726399_4727143	559	A	T	163.0	chr29	4726955	A	15	0	2	72	18	0	2	81	18	0	2	81	16	0	2	75	11	0	2	60	14	0	2	72	Y	161	3.114	0
+Contig48_chr29_13129286_13130137	232	A	G	92.2	chr29	13129514	G	13	0	2	66	11	0	2	60	19	0	2	84	16	0	2	75	11	0	2	60	17	0	2	78	Y	337	2.581	1
+Contig33_chr29_17000374_17000921	71	C	T	48.6	chr29	17000441	-	4	0	2	39	9	0	2	54	12	0	2	66	10	0	2	57	7	0	2	48	4	0	2	39	N	26	5.491	0
+Contig34_chr29_17581796_17584016	2105	C	T	126.0	chr29	17583890	T	14	0	2	69	11	0	2	60	18	0	2	81	12	0	2	63	10	0	2	57	10	0	2	57	Y	22	2.208	0
+Contig19_chr29_20976080_20977761	1007	G	A	115.0	chr29	20977076	G	19	0	2	84	22	0	2	93	22	0	2	93	22	0	2	93	11	0	2	60	13	0	2	66	Y	4	1.915	0
+Contig51_chr29_21149853_21150467	266	C	T	146.0	chr29	21150118	C	12	0	2	63	12	0	2	63	23	0	2	96	14	0	2	69	13	0	2	66	10	0	2	57	Y	4	0.051	0
+Contig1_chr30_5992217_5993068	106	C	T	129.0	chr30	5992319	C	10	0	2	57	11	0	2	60	7	0	2	48	11	0	2	60	10	0	2	57	12	0	2	63	Y	76	1.079	0
+Contig1_chr30_8232878_8233406	402	C	T	127.0	chr30	8233264	C	8	0	2	51	19	0	2	84	16	0	2	75	18	0	2	81	10	0	2	57	14	0	2	69	Y	358	5.283	0
+Contig108_chr30_9436961_9437520	546	C	T	39.8	chr30	9437502	C	7	0	2	48	5	0	2	42	2	0	2	33	7	0	2	48	5	0	2	42	7	0	2	48	Y	64	+99.	0
+Contig165_chr30_25804389_25804926	190	T	C	126.0	chr30	25804592	C	3	0	2	36	8	0	2	51	7	0	2	48	10	0	2	57	7	0	2	48	4	0	2	39	Y	113	0.329	0
+Contig193_chr30_27495616_27496125	434	C	A	234.0	chr30	27496024	C	13	0	2	66	16	0	2	75	25	0	2	102	16	0	2	75	13	0	2	66	14	0	2	69	Y	76	2.621	0
+Contig38_chr31_5164423_5166573	2074	C	T	134.0	chr31	5166501	T	13	0	2	66	10	0	2	57	17	0	2	78	11	0	2	60	17	0	2	78	10	0	2	57	Y	58	+99.	0
+Contig6_chr31_9649308_9650149	431	G	T	162.0	chr31	9649742	G	31	0	2	120	23	0	2	96	17	0	2	78	17	0	2	78	10	0	2	57	16	0	2	75	Y	98	2.200	0
+Contig7_chr31_12384974_12386400	305	C	T	69.6	chr31	12385267	C	6	0	2	45	10	0	2	57	11	0	2	60	11	0	2	60	9	0	2	54	12	0	2	63	Y	44	1.165	0
+Contig90_chr31_17267583_17267778	81	C	A	143.0	chr31	17267665	C	20	0	2	87	6	0	2	45	14	0	2	72	22	0	2	93	17	0	2	78	15	0	2	72	N	7	0.565	0
+Contig137_chr31_23357653_23358568	885	G	A	119.0	chr31	23358545	G	5	0	2	42	3	0	2	36	3	0	2	36	2	0	2	33	3	0	2	36	4	0	2	39	Y	11	+99.	0
+Contig17_chr31_26433828_26434459	498	T	C	9.79	chr31	26434322	T	18	0	2	81	10	0	2	57	15	0	2	72	13	0	2	66	16	0	2	75	15	0	2	72	Y	137	4.814	0
+Contig30_chr32_25902721_25905783	208	C	G	162.0	chr32	25902927	G	11	0	2	60	13	0	2	66	11	0	2	60	12	0	2	63	7	0	2	48	11	0	2	60	Y	145	0.322	2
+Contig42_chr32_38900713_38901320	320	A	G	134.0	chr32	38901021	T	12	0	2	63	10	0	2	57	9	11	1	104	5	0	2	42	19	0	2	84	7	6	1	56	Y	71	0.165	0
+Contig18_chr33_22207246_22209159	1363	G	T	51.5	chr33	22208619	-	16	0	2	75	8	0	2	51	11	0	2	60	10	0	2	57	15	0	2	72	12	0	2	63	Y	59	2.560	0
+Contig104_chr33_22483642_22484187	424	C	T	140.0	chr33	22484054	T	13	0	2	66	16	0	2	75	9	0	2	54	15	0	2	72	13	0	2	66	10	0	2	57	Y	36	0.404	0
+Contig170_chr33_26189421_26189940	292	T	C	98.4	chr33	26189703	T	21	0	2	90	13	0	2	66	15	0	2	72	13	0	2	66	19	0	2	84	13	0	2	66	Y	23	0.307	0
+Contig41_chr34_16544482_16545449	46	T	C	102.0	chr34	16544523	T	5	0	2	42	11	0	2	60	6	0	2	45	0	2	0	3	7	0	2	48	8	0	2	51	Y	215	1.156	0
+Contig8_chr34_18474513_18475673	1122	C	A	129.0	chr34	18475628	A	8	0	2	51	15	0	2	72	13	0	2	66	17	0	2	78	13	0	2	66	6	0	2	45	Y	61	0.123	2
+Contig152_chr34_31794848_31795540	242	G	A	93.2	chr34	31795093	G	11	0	2	60	24	0	2	99	17	0	2	78	15	0	2	72	18	0	2	81	17	0	2	78	Y	123	2.780	0
+Contig28_chr34_41708848_41712034	1381	A	G	78.2	chr34	41710232	A	11	0	2	60	17	0	2	78	15	0	2	72	16	0	2	75	15	0	2	72	14	0	2	69	Y	236	0.234	0
+Contig85_chr34_42798284_42800584	1845	C	T	171.0	chr34	42800126	T	5	0	2	42	7	0	2	48	6	0	2	45	7	0	2	48	6	0	2	45	2	0	2	33	Y	5	2.787	0
+Contig47_chr35_3666773_3667898	348	G	T	124.0	chr35	3667121	G	9	0	2	54	20	0	2	87	18	0	2	81	15	0	2	72	12	0	2	63	14	0	2	69	Y	285	0.235	0
+Contig195_chr35_15722500_15722741	205	G	A	4.08	chr35	15722718	G	3	0	2	36	5	0	2	42	1	0	2	30	6	0	2	45	1	0	2	30	1	0	2	30	N	43	+99.	0
+Contig101_chr35_19513178_19513697	62	C	T	112.0	chr35	19513238	C	12	0	2	63	7	0	2	48	13	0	2	66	7	0	2	48	5	0	2	42	8	0	2	51	N	115	3.135	0
+Contig47_chr35_24382042_24382526	33	G	A	87.0	chr35	24382076	G	5	0	2	42	4	0	2	39	6	0	2	45	7	0	2	48	4	0	2	39	2	0	2	33	Y	71	+99.	0
+Contig77_chr35_24796947_24797172	65	A	G	52.1	chr35	24797009	A	7	0	2	48	5	0	2	42	8	0	2	51	6	0	2	45	12	0	2	63	10	0	2	57	N	11	1.401	3
+Contig74_chr35_25394343_25394813	303	A	T	221.0	chr35	25394646	G	23	0	2	96	15	0	2	72	25	0	2	105	7	7	1	49	18	0	2	81	16	0	2	75	Y	58	4.298	0
+Contig5_chr36_4562983_4563634	343	C	T	151.0	chr36	4563324	T	20	0	2	87	20	0	2	87	23	0	2	96	24	0	2	99	9	0	2	54	8	0	2	51	Y	40	1.169	0
+Contig75_chr36_7885319_7885588	53	G	A	25.7	chr36	7885372	G	10	0	2	57	8	0	2	51	13	0	2	66	7	0	2	48	4	0	2	39	7	0	2	48	N	7	2.653	0
+Contig184_chr36_18956191_18958552	187	A	G	11.5	chr36	18956371	G	10	0	2	57	11	0	2	60	21	0	2	90	14	0	2	69	7	0	2	48	4	0	2	39	N	278	1.434	2
+Contig12_chr36_21557176_21557828	513	T	A	159.0	chr36	21557695	A	11	0	2	60	14	0	2	69	21	0	2	90	12	0	2	63	15	0	2	72	11	0	2	60	Y	55	0.222	0
+Contig2_chr36_22436067_22436794	653	C	T	73.0	chr36	22436730	C	11	0	2	60	16	0	2	75	13	0	2	66	11	0	2	60	21	0	2	90	21	0	2	90	Y	9	0.534	0
+Contig133_chr36_32954045_32955409	136	A	G	116.0	chr36	32954182	A	16	0	2	75	15	0	2	72	20	0	2	87	11	0	2	60	18	0	2	81	13	0	2	66	Y	74	3.772	1
+Contig53_chr37_6665763_6665919	116	C	T	111.0	chr37	6665875	C	9	0	2	54	9	0	2	54	5	0	2	42	9	0	2	54	8	0	2	51	10	0	2	57	N	15	10.875	1
+Contig42_chr37_9589176_9591269	252	G	A	25.1	chr37	9589430	G	10	0	2	40	13	0	2	66	18	0	2	81	21	0	2	90	9	0	2	54	17	0	2	78	N	67	1.170	2
+Contig2_chr37_17134963_17136513	1140	A	C	158.0	chr37	17136092	A	14	0	2	69	24	0	2	99	17	0	2	78	16	0	2	75	15	0	2	75	13	0	2	66	Y	12	0.053	1
+Contig18_chr37_17147806_17149851	291	T	G	112.0	chr37	17148084	T	4	6	1	45	16	0	2	75	17	0	2	78	14	0	2	69	22	0	2	93	13	0	2	66	Y	41	4.442	0
+Contig64_chr37_17606895_17607534	565	C	T	30.2	chr37	17607439	A	9	0	2	54	16	0	2	75	20	0	2	87	14	0	2	69	16	0	2	75	10	0	2	57	N	20	1.622	0
+Contig126_chr37_21587881_21590621	373	G	T	132.0	chr37	21588256	G	11	0	2	60	11	0	2	60	23	0	2	96	12	0	2	63	8	0	2	51	18	0	2	81	Y	12	0.549	0
+Contig2_chr37_31197993_31198256	182	C	T	39.6	chr37	31198171	T	6	0	2	45	10	0	2	57	7	0	2	48	9	0	2	54	10	0	2	57	12	0	2	63	N	2	0.595	0
+Contig46_chr37_31852376_31853555	825	A	G	111.0	chr37	31853191	G	19	0	2	84	14	0	2	69	15	0	2	72	7	0	2	48	8	0	2	51	16	0	2	75	Y	17	0.128	1
+Contig7_chr38_12217200_12218387	1163	A	T	44.4	chr38	12218353	A	11	0	2	60	13	0	2	66	17	0	2	78	10	0	2	57	11	0	2	60	11	0	2	60	Y	67	+99.	0
+Contig15_chr38_12282020_12282253	150	C	T	156.0	chr38	12282164	A	17	0	2	78	11	0	2	60	19	0	2	84	14	0	2	69	5	0	2	42	14	0	2	69	Y	26	2.952	1
+Contig6_chr38_16185744_16186110	325	A	G	74.9	chr38	16186061	A	5	0	2	42	3	0	2	36	9	0	2	54	7	0	2	48	1	0	2	30	12	0	2	63	Y	40	+99.	0
+Contig265_chrX_2689247_2689484	114	C	G	103.0	chrX	2689356	C	11	0	2	60	9	0	2	54	13	0	2	66	16	0	2	75	14	0	2	69	10	0	2	57	N	2	9.232	1
+Contig122_chrX_6026976_6027327	330	C	T	79.4	chrX	6027303	C	3	0	2	36	3	0	2	36	3	0	2	36	4	0	2	39	3	0	2	36	6	0	2	45	Y	30	+99.	0
+Contig113_chrX_26287829_26288398	385	C	T	59.6	chrX	26288213	C	9	0	2	54	9	0	2	54	17	0	2	78	11	0	2	60	3	8	1	44	4	0	2	39	N	13	0.077	0
+Contig237_chrX_31256648_31257654	165	T	A	246.0	chrX	31256814	T	7	0	2	48	23	0	2	96	19	0	2	84	17	0	2	78	14	0	2	69	8	0	2	51	Y	37	1.481	0
+Contig90_chrX_57430715_57431566	548	C	T	116.0	chrX	57431266	T	9	0	2	54	18	0	2	81	13	0	2	66	14	0	2	69	8	0	2	54	7	0	2	48	Y	261	0.154	1
+Contig133_chrX_84833782_84834125	182	G	A	69.7	chrX	84833962	G	5	0	2	42	18	0	2	81	12	0	2	63	19	0	2	84	6	3	1	27	7	0	2	48	N	619	0.278	0
+Contig125_chrX_93319363_93320877	349	A	C	145.0	chrX	93319721	A	4	0	2	39	6	0	2	45	11	0	2	60	10	0	2	57	13	0	2	66	6	0	2	45	Y	59	1.686	0
Binary file genome_diversity/test-data/test_out/pathway_image/pathway_image.png has changed
Binary file genome_diversity/test-data/test_out/pca/PCA.pdf has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/test-data/test_out/pca/admix.gd_indivs	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,6 @@
+PB1 M All_Individuals
+PB2 M All_Individuals
+PB3 M All_Individuals
+PB4 M All_Individuals
+PB6 M All_Individuals
+PB8 M All_Individuals
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/test-data/test_out/pca/admix.gd_snp	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,303 @@
+  snp1 11 0.002 2000 A T
+  snp3 11 0.002 2000 A T
+  snp4 11 0.002 2000 A T
+  snp5 11 0.002 2000 A T
+  snp6 11 0.002 2000 A T
+  snp7 11 0.002 2000 A T
+  snp8 11 0.002 2000 A T
+  snp9 11 0.002 2000 A T
+  snp10 11 0.002 2000 A T
+  snp11 11 0.002 2000 A T
+  snp12 11 0.002 2000 A T
+  snp13 11 0.002 2000 A T
+  snp14 11 0.002 2000 A T
+  snp16 11 0.002 2000 A T
+  snp17 11 0.002 2000 A T
+  snp22 11 0.002 2000 A T
+  snp24 11 0.002 2000 A T
+  snp25 11 0.002 2000 A T
+  snp27 11 0.002 2000 A T
+  snp28 11 0.002 2000 A T
+  snp29 11 0.002 2000 A T
+  snp30 11 0.002 2000 A T
+  snp31 11 0.002 2000 A T
+  snp33 11 0.002 2000 A T
+  snp34 11 0.002 2000 A T
+  snp37 11 0.002 2000 A T
+  snp38 11 0.002 2000 A T
+  snp39 11 0.002 2000 A T
+  snp40 11 0.002 2000 A T
+  snp41 11 0.002 2000 A T
+  snp42 11 0.002 2000 A T
+  snp43 11 0.002 2000 A T
+  snp45 11 0.002 2000 A T
+  snp46 11 0.002 2000 A T
+  snp47 11 0.002 2000 A T
+  snp48 11 0.002 2000 A T
+  snp49 11 0.002 2000 A T
+  snp50 11 0.002 2000 A T
+  snp51 11 0.002 2000 A T
+  snp52 11 0.002 2000 A T
+  snp53 11 0.002 2000 A T
+  snp54 11 0.002 2000 A T
+  snp56 11 0.002 2000 A T
+  snp58 11 0.002 2000 A T
+  snp59 11 0.002 2000 A T
+  snp60 11 0.002 2000 A T
+  snp61 11 0.002 2000 A T
+  snp62 11 0.002 2000 A T
+  snp63 11 0.002 2000 A T
+  snp64 11 0.002 2000 A T
+  snp65 11 0.002 2000 A T
+  snp67 11 0.002 2000 A T
+  snp68 11 0.002 2000 A T
+  snp70 11 0.002 2000 A T
+  snp71 11 0.002 2000 A T
+  snp72 11 0.002 2000 A T
+  snp73 11 0.002 2000 A T
+  snp74 11 0.002 2000 A T
+  snp75 11 0.002 2000 A T
+  snp76 11 0.002 2000 A T
+  snp77 11 0.002 2000 A T
+  snp78 11 0.002 2000 A T
+  snp80 11 0.002 2000 A T
+  snp81 11 0.002 2000 A T
+  snp83 11 0.002 2000 A T
+  snp84 11 0.002 2000 A T
+  snp87 11 0.002 2000 A T
+  snp89 11 0.002 2000 A T
+  snp90 11 0.002 2000 A T
+  snp91 11 0.002 2000 A T
+  snp92 11 0.002 2000 A T
+  snp93 11 0.002 2000 A T
+  snp94 11 0.002 2000 A T
+  snp98 11 0.002 2000 A T
+  snp100 11 0.002 2000 A T
+  snp101 11 0.002 2000 A T
+  snp102 11 0.002 2000 A T
+  snp103 11 0.002 2000 A T
+  snp104 11 0.002 2000 A T
+  snp105 11 0.002 2000 A T
+  snp106 11 0.002 2000 A T
+  snp107 11 0.002 2000 A T
+  snp108 11 0.002 2000 A T
+  snp110 11 0.002 2000 A T
+  snp111 11 0.002 2000 A T
+  snp112 11 0.002 2000 A T
+  snp113 11 0.002 2000 A T
+  snp116 11 0.002 2000 A T
+  snp117 11 0.002 2000 A T
+  snp118 11 0.002 2000 A T
+  snp119 11 0.002 2000 A T
+  snp121 11 0.002 2000 A T
+  snp122 11 0.002 2000 A T
+  snp123 11 0.002 2000 A T
+  snp124 11 0.002 2000 A T
+  snp125 11 0.002 2000 A T
+  snp126 11 0.002 2000 A T
+  snp128 11 0.002 2000 A T
+  snp129 11 0.002 2000 A T
+  snp131 11 0.002 2000 A T
+  snp133 11 0.002 2000 A T
+  snp134 11 0.002 2000 A T
+  snp135 11 0.002 2000 A T
+  snp137 11 0.002 2000 A T
+  snp138 11 0.002 2000 A T
+  snp139 11 0.002 2000 A T
+  snp140 11 0.002 2000 A T
+  snp141 11 0.002 2000 A T
+  snp143 11 0.002 2000 A T
+  snp145 11 0.002 2000 A T
+  snp146 11 0.002 2000 A T
+  snp148 11 0.002 2000 A T
+  snp149 11 0.002 2000 A T
+  snp150 11 0.002 2000 A T
+  snp151 11 0.002 2000 A T
+  snp152 11 0.002 2000 A T
+  snp153 11 0.002 2000 A T
+  snp154 11 0.002 2000 A T
+  snp156 11 0.002 2000 A T
+  snp157 11 0.002 2000 A T
+  snp158 11 0.002 2000 A T
+  snp159 11 0.002 2000 A T
+  snp160 11 0.002 2000 A T
+  snp161 11 0.002 2000 A T
+  snp162 11 0.002 2000 A T
+  snp164 11 0.002 2000 A T
+  snp165 11 0.002 2000 A T
+  snp167 11 0.002 2000 A T
+  snp168 11 0.002 2000 A T
+  snp169 11 0.002 2000 A T
+  snp170 11 0.002 2000 A T
+  snp171 11 0.002 2000 A T
+  snp172 11 0.002 2000 A T
+  snp174 11 0.002 2000 A T
+  snp175 11 0.002 2000 A T
+  snp176 11 0.002 2000 A T
+  snp177 11 0.002 2000 A T
+  snp178 11 0.002 2000 A T
+  snp179 11 0.002 2000 A T
+  snp181 11 0.002 2000 A T
+  snp182 11 0.002 2000 A T
+  snp183 11 0.002 2000 A T
+  snp184 11 0.002 2000 A T
+  snp185 11 0.002 2000 A T
+  snp186 11 0.002 2000 A T
+  snp188 11 0.002 2000 A T
+  snp191 11 0.002 2000 A T
+  snp192 11 0.002 2000 A T
+  snp193 11 0.002 2000 A T
+  snp195 11 0.002 2000 A T
+  snp196 11 0.002 2000 A T
+  snp197 11 0.002 2000 A T
+  snp199 11 0.002 2000 A T
+  snp200 11 0.002 2000 A T
+  snp201 11 0.002 2000 A T
+  snp202 11 0.002 2000 A T
+  snp203 11 0.002 2000 A T
+  snp205 11 0.002 2000 A T
+  snp207 11 0.002 2000 A T
+  snp210 11 0.002 2000 A T
+  snp211 11 0.002 2000 A T
+  snp212 11 0.002 2000 A T
+  snp213 11 0.002 2000 A T
+  snp214 11 0.002 2000 A T
+  snp215 11 0.002 2000 A T
+  snp216 11 0.002 2000 A T
+  snp217 11 0.002 2000 A T
+  snp218 11 0.002 2000 A T
+  snp219 11 0.002 2000 A T
+  snp220 11 0.002 2000 A T
+  snp221 11 0.002 2000 A T
+  snp223 11 0.002 2000 A T
+  snp224 11 0.002 2000 A T
+  snp225 11 0.002 2000 A T
+  snp226 11 0.002 2000 A T
+  snp227 11 0.002 2000 A T
+  snp228 11 0.002 2000 A T
+  snp229 11 0.002 2000 A T
+  snp230 11 0.002 2000 A T
+  snp231 11 0.002 2000 A T
+  snp232 11 0.002 2000 A T
+  snp235 11 0.002 2000 A T
+  snp236 11 0.002 2000 A T
+  snp237 11 0.002 2000 A T
+  snp239 11 0.002 2000 A T
+  snp240 11 0.002 2000 A T
+  snp241 11 0.002 2000 A T
+  snp242 11 0.002 2000 A T
+  snp243 11 0.002 2000 A T
+  snp244 11 0.002 2000 A T
+  snp246 11 0.002 2000 A T
+  snp247 11 0.002 2000 A T
+  snp248 11 0.002 2000 A T
+  snp249 11 0.002 2000 A T
+  snp250 11 0.002 2000 A T
+  snp251 11 0.002 2000 A T
+  snp252 11 0.002 2000 A T
+  snp253 11 0.002 2000 A T
+  snp254 11 0.002 2000 A T
+  snp255 11 0.002 2000 A T
+  snp256 11 0.002 2000 A T
+  snp257 11 0.002 2000 A T
+  snp258 11 0.002 2000 A T
+  snp260 11 0.002 2000 A T
+  snp261 11 0.002 2000 A T
+  snp262 11 0.002 2000 A T
+  snp263 11 0.002 2000 A T
+  snp264 11 0.002 2000 A T
+  snp265 11 0.002 2000 A T
+  snp266 11 0.002 2000 A T
+  snp267 11 0.002 2000 A T
+  snp268 11 0.002 2000 A T
+  snp269 11 0.002 2000 A T
+  snp270 11 0.002 2000 A T
+  snp271 11 0.002 2000 A T
+  snp273 11 0.002 2000 A T
+  snp274 11 0.002 2000 A T
+  snp275 11 0.002 2000 A T
+  snp276 11 0.002 2000 A T
+  snp277 11 0.002 2000 A T
+  snp278 11 0.002 2000 A T
+  snp281 11 0.002 2000 A T
+  snp282 11 0.002 2000 A T
+  snp284 11 0.002 2000 A T
+  snp287 11 0.002 2000 A T
+  snp288 11 0.002 2000 A T
+  snp289 11 0.002 2000 A T
+  snp290 11 0.002 2000 A T
+  snp291 11 0.002 2000 A T
+  snp292 11 0.002 2000 A T
+  snp293 11 0.002 2000 A T
+  snp294 11 0.002 2000 A T
+  snp297 11 0.002 2000 A T
+  snp298 11 0.002 2000 A T
+  snp299 11 0.002 2000 A T
+  snp300 11 0.002 2000 A T
+  snp301 11 0.002 2000 A T
+  snp302 11 0.002 2000 A T
+  snp303 11 0.002 2000 A T
+  snp304 11 0.002 2000 A T
+  snp307 11 0.002 2000 A T
+  snp308 11 0.002 2000 A T
+  snp309 11 0.002 2000 A T
+  snp310 11 0.002 2000 A T
+  snp312 11 0.002 2000 A T
+  snp313 11 0.002 2000 A T
+  snp316 11 0.002 2000 A T
+  snp317 11 0.002 2000 A T
+  snp320 11 0.002 2000 A T
+  snp321 11 0.002 2000 A T
+  snp322 11 0.002 2000 A T
+  snp323 11 0.002 2000 A T
+  snp324 11 0.002 2000 A T
+  snp325 11 0.002 2000 A T
+  snp328 11 0.002 2000 A T
+  snp329 11 0.002 2000 A T
+  snp331 11 0.002 2000 A T
+  snp332 11 0.002 2000 A T
+  snp333 11 0.002 2000 A T
+  snp334 11 0.002 2000 A T
+  snp335 11 0.002 2000 A T
+  snp336 11 0.002 2000 A T
+  snp338 11 0.002 2000 A T
+  snp339 11 0.002 2000 A T
+  snp341 11 0.002 2000 A T
+  snp342 11 0.002 2000 A T
+  snp344 11 0.002 2000 A T
+  snp345 11 0.002 2000 A T
+  snp348 11 0.002 2000 A T
+  snp350 11 0.002 2000 A T
+  snp352 11 0.002 2000 A T
+  snp353 11 0.002 2000 A T
+  snp354 11 0.002 2000 A T
+  snp355 11 0.002 2000 A T
+  snp360 11 0.002 2000 A T
+  snp361 11 0.002 2000 A T
+  snp362 11 0.002 2000 A T
+  snp364 11 0.002 2000 A T
+  snp366 11 0.002 2000 A T
+  snp369 11 0.002 2000 A T
+  snp370 11 0.002 2000 A T
+  snp371 11 0.002 2000 A T
+  snp372 11 0.002 2000 A T
+  snp373 11 0.002 2000 A T
+  snp374 11 0.002 2000 A T
+  snp375 11 0.002 2000 A T
+  snp376 11 0.002 2000 A T
+  snp377 11 0.002 2000 A T
+  snp378 11 0.002 2000 A T
+  snp379 11 0.002 2000 A T
+  snp380 11 0.002 2000 A T
+  snp381 11 0.002 2000 A T
+  snp382 11 0.002 2000 A T
+  snp383 11 0.002 2000 A T
+  snp384 11 0.002 2000 A T
+  snp385 11 0.002 2000 A T
+  snp386 11 0.002 2000 A T
+  snp389 11 0.002 2000 A T
+  snp390 11 0.002 2000 A T
+  snp393 11 0.002 2000 A T
+  snp395 11 0.002 2000 A T
+  snp397 11 0.002 2000 A T
+  snp400 11 0.002 2000 A T
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/test-data/test_out/pca/admix.geno	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,303 @@
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+122222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+122222
+222222
+222222
+222222
+222222
+222222
+222222
+222212
+222222
+222222
+222221
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+212222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+122211
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+122222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222022
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+221221
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222122
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+122222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222212
+222222
+222222
+222222
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/test-data/test_out/pca/coordinates.txt	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,7 @@
+           #eigvals:     3.243     1.103
+                 PB1     0.1887      0.4703  All_Individuals
+                 PB2     0.0398      0.0455  All_Individuals
+                 PB3     0.1647     -0.6945  All_Individuals
+                 PB4    -0.8954     -0.0220  All_Individuals
+                 PB6     0.1887      0.4703  All_Individuals
+                 PB8     0.3135     -0.2696  All_Individuals
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/test-data/test_out/pca/explained.txt	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,4 @@
+Percentage explained by eigenvectors:
+1: 64.9%
+2: 22.1%
+3: 13.1%
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/test-data/test_out/pca/par.admix	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,7 @@
+genotypename: /scratch/galaxy/home/oocyte/galaxy_oocyte/database/files/000/dataset_260_files/admix.geno
+snpname: /scratch/galaxy/home/oocyte/galaxy_oocyte/database/files/000/dataset_260_files/admix.snp
+indivname: /scratch/galaxy/home/oocyte/galaxy_oocyte/database/files/000/dataset_260_files/admix.ind
+evecoutname: /scratch/galaxy/home/oocyte/galaxy_oocyte/database/files/000/dataset_260_files/coordinates.txt
+evaloutname: /scratch/galaxy/home/oocyte/galaxy_oocyte/database/files/000/dataset_260_files/admix.eval
+altnormstyle: NO
+numoutevec: 2
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/test-data/test_out/pca/pca.html	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,37 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <meta http-equiv="Content-type" content="text/html; charset=UTF-8" />
+    <link rel="stylesheet" href="/static/style/base.css" type="text/css" />
+    <title>PCA Galaxy Composite Dataset</title>
+  </head>
+  <body>
+    <div class="document">
+      Output completed: 2012-04-03 02:19:05 PM
+      <p/>
+      <div id="gd_outputs">
+        Outputs
+        <ul>
+            <li><a href="PCA.pdf">PCA.pdf</a></li>
+            <li><a href="coordinates.txt">coordinates.txt</a></li>
+            <li><a href="explained.txt">explained.txt</a></li>
+        </ul>
+      </div>
+      <div id="gd_inputs">
+        Inputs
+        <ul>
+            <li><a href="par.admix">par.admix</a></li>
+            <li><a href="admix.geno">admix.geno</a></li>
+            <li><a href="admix.snp">admix.snp</a></li>
+            <li><a href="admix.ind">admix.ind</a></li>
+        </ul>
+      </div>
+      <div id="gd_misc">
+        Stats<p/><pre>
+
+</pre>
+      </div>
+    </div>
+  </body>
+</html>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/test-data/test_out/phylogenetic_tree/distance_matrix.phylip	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,8 @@
+7
+  canFam2 0.0000 0.3205 0.3085 0.3193 0.3101 0.3138 0.3170
+      PB1 0.3205 0.0000 0.0103 0.0100 0.0130 0.0119 0.0112
+      PB2 0.3085 0.0103 0.0000 0.0033 0.0062 0.0094 0.0062
+      PB3 0.3193 0.0100 0.0033 0.0000 0.0081 0.0091 0.0054
+      PB4 0.3101 0.0130 0.0062 0.0081 0.0000 0.0099 0.0088
+      PB6 0.3138 0.0119 0.0094 0.0091 0.0099 0.0000 0.0079
+      PB8 0.3170 0.0112 0.0062 0.0054 0.0088 0.0079 0.0000
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/test-data/test_out/phylogenetic_tree/informative_snps.txt	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,7 @@
+  canFam2        0      338      339      350      345      342      344
+      PB1      338        0      338      344      338      336      339
+      PB2      339      338        0      345      338      339      338
+      PB3      350      344      345        0      347      342      347
+      PB4      345      338      338      347        0      337      341
+      PB6      342      336      339      342      337        0      343
+      PB8      344      339      338      347      341      343        0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/test-data/test_out/phylogenetic_tree/mega_distance_matrix.txt	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,27 @@
+#mega
+!Title: Galaxy;
+!Format DataType=Distance DataFormat=LowerLeft NTaxa=7;
+
+[1] #canFam2
+[2] #PB1
+[3] #PB2
+[4] #PB3
+[5] #PB4
+[6] #PB6
+[7] #PB8
+
+
+
+[   1   2   3   4   5   6   7 ]
+[1]
+[2]  0.3205
+[3]  0.3085 0.0103
+[4]  0.3193 0.0100 0.0033
+[5]  0.3101 0.0130 0.0062 0.0081
+[6]  0.3138 0.0119 0.0094 0.0091 0.0099
+[7]  0.3170 0.0112 0.0062 0.0054 0.0088 0.0079
+
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/test-data/test_out/phylogenetic_tree/phylogenetic_tree.html	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,49 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <meta http-equiv="Content-type" content="text/html; charset=UTF-8" />
+    <link rel="stylesheet" href="/static/style/base.css" type="text/css" />
+    <title>Phylogenetic tree Galaxy Composite Dataset</title>
+  </head>
+  <body>
+    <div class="document">
+      Output completed: 2012-04-03 01:57:44 PM
+      <p/>
+      <div id="gd_outputs">
+        Outputs
+        <ul>
+            <li><a href="tree.pdf">tree.pdf</a></li>
+            <li><a href="phylogenetic_tree.newick">phylogenetic tree (newick)</a></li>
+            <li><a href="distance_matrix.phylip">Phylip distance matrix</a></li>
+            <li><a href="mega_distance_matrix.txt">Mega distance matrix</a></li>
+            <li><a href="informative_snps.txt">informative SNPs</a></li>
+        </ul>
+      </div>
+      <div id="gd_inputs">
+        Inputs
+        <ul>
+            <li>Minimum coverage: 3</li>
+            <li>Minimum quality: 30</li>
+            <li>Include reference sequence: yes</li>
+            <li>Data source: sequence coverage</li>
+            <li>Branch type: square</li>
+            <li>Draw branches to scale: yes</li>
+            <li>Show branch lengths: yes</li>
+            <li>Tree layout: horizontal</li>
+        </ul>
+      </div>
+      <div id="gd_misc">
+        Individuals
+<ol>
+<li>PB1</li>
+<li>PB2</li>
+<li>PB3</li>
+<li>PB4</li>
+<li>PB6</li>
+<li>PB8</li>
+</ol>
+      </div>
+    </div>
+  </body>
+</html>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/test-data/test_out/phylogenetic_tree/phylogenetic_tree.newick	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,16 @@
+(
+(
+(
+PB4:0.00174,
+canFam2:0.30836)
+:0.00188,
+PB2:0.00042)
+:0.00210,
+(
+PB6:0.00470,
+PB1:0.00720)
+:0.00035,
+(
+PB8:0.00288,
+PB3:0.00252)
+:0.00055);
Binary file genome_diversity/test-data/test_out/phylogenetic_tree/tree.pdf has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/test-data/test_out/population_structure/graphical.pdf	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,147 @@
+%PDF-1.4
+%���ρ�\r
+1 0 obj
+<<
+/CreationDate (D:20120403142055)
+/ModDate (D:20120403142055)
+/Title (R Graphics Output)
+/Producer (R 2.11.0)
+/Creator (R)
+>>
+endobj
+2 0 obj
+<<
+/Type /Catalog
+/Pages 3 0 R
+>>
+endobj
+5 0 obj
+<<
+/Type /Page
+/Parent 3 0 R
+/Contents 6 0 R
+/Resources 4 0 R
+>>
+endobj
+6 0 obj
+<<
+/Length 7 0 R
+>>
+stream
+1 J 1 j q
+Q q
+1.000 0.000 0.000 rg
+74.40 74.27 54.86 0.00 re f
+0.000 1.000 1.000 rg
+74.40 74.27 54.86 82.69 re f
+1.000 0.000 0.000 rg
+140.23 74.27 54.86 82.69 re f
+0.000 1.000 1.000 rg
+140.23 156.96 54.86 0.00 re f
+1.000 0.000 0.000 rg
+206.06 74.27 54.86 82.69 re f
+0.000 1.000 1.000 rg
+206.06 156.96 54.86 0.00 re f
+1.000 0.000 0.000 rg
+271.89 74.27 54.86 0.00 re f
+0.000 1.000 1.000 rg
+271.89 74.27 54.86 82.69 re f
+1.000 0.000 0.000 rg
+337.71 74.27 54.86 82.69 re f
+0.000 1.000 1.000 rg
+337.71 156.96 54.86 0.00 re f
+1.000 0.000 0.000 rg
+403.54 74.27 54.86 82.69 re f
+0.000 1.000 1.000 rg
+403.54 156.96 54.86 0.00 re f
+BT
+0.000 0.000 0.000 rg
+/F2 1 Tf 12.00 0.00 -0.00 12.00 236.05 18.72 Tm (Individual #) Tj
+ET
+BT
+/F2 1 Tf 0.00 12.00 -12.00 0.00 12.96 91.68 Tm [(Ancestr) -30 (y)] TJ
+ET
+Q q
+0.000 0.000 0.000 RG
+0.75 w
+[] 0 d
+1 J
+1 j
+10.00 M
+59.04 74.27 m 59.04 156.96 l S
+59.04 74.27 m 51.84 74.27 l S
+59.04 90.81 m 51.84 90.81 l S
+59.04 107.34 m 51.84 107.34 l S
+59.04 123.88 m 51.84 123.88 l S
+59.04 140.42 m 51.84 140.42 l S
+59.04 156.96 m 51.84 156.96 l S
+BT
+0.000 0.000 0.000 rg
+/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 65.93 Tm (0.0) Tj
+ET
+BT
+/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 99.00 Tm (0.4) Tj
+ET
+BT
+/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 132.08 Tm (0.8) Tj
+ET
+Q
+endstream
+endobj
+7 0 obj
+1275
+endobj
+3 0 obj
+<<
+/Type /Pages
+/Kids [
+5 0 R
+]
+/Count 1
+/MediaBox [0 0 504 216]
+>>
+endobj
+4 0 obj
+<<
+/ProcSet [/PDF /Text]
+/Font <</F2 9 0 R >>
+/ExtGState << >>
+>>
+endobj
+8 0 obj
+<<
+/Type /Encoding
+/BaseEncoding /WinAnsiEncoding
+/Differences [ 45/minus 96/quoteleft
+144/dotlessi /grave /acute /circumflex /tilde /macron /breve /dotaccent
+/dieresis /.notdef /ring /cedilla /.notdef /hungarumlaut /ogonek /caron /space]
+>>
+endobj
+9 0 obj <<
+/Type /Font
+/Subtype /Type1
+/Name /F2
+/BaseFont /Helvetica
+/Encoding 8 0 R
+>> endobj
+xref
+0 10
+0000000000 65535 f
+0000000021 00000 n
+0000000164 00000 n
+0000001641 00000 n
+0000001724 00000 n
+0000000213 00000 n
+0000000293 00000 n
+0000001621 00000 n
+0000001805 00000 n
+0000002062 00000 n
+trailer
+<<
+/Size 10
+/Info 1 0 R
+/Root 2 0 R
+>>
+startxref
+2158
+%%EOF
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/test-data/test_out/population_structure/numeric.txt	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,6 @@
+0.000010 0.999990
+0.999990 0.000010
+0.999990 0.000010
+0.000010 0.999990
+0.999990 0.000010
+0.999990 0.000010
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/test-data/test_out/population_structure/population_structure.html	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,44 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <meta http-equiv="Content-type" content="text/html; charset=UTF-8" />
+    <link rel="stylesheet" href="/static/style/base.css" type="text/css" />
+    <title>Population structure Galaxy Composite Dataset</title>
+  </head>
+  <body>
+    <div class="document">
+      Output completed: 2012-04-03 02:20:55 PM
+      <p/>
+      <div id="gd_outputs">
+        Outputs
+        <ul>
+            <li><a href="graphical.pdf">graphical.pdf</a></li>
+            <li><a href="numeric.txt">numeric.txt</a></li>
+        </ul>
+      </div>
+      <div id="gd_inputs">
+        Inputs
+        <ul>
+            <li>Number of populations: 2</li>
+        </ul>
+      </div>
+      <div id="gd_misc">
+        Populations
+<ul>
+<li>
+All Individuals
+<ol>
+<li>PB1</li>
+<li>PB2</li>
+<li>PB3</li>
+<li>PB4</li>
+<li>PB6</li>
+<li>PB8</li>
+</ol>
+</li>
+</ul>
+      </div>
+    </div>
+  </body>
+</html>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/test-data/test_out/prepare_population_structure/admix.map	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,303 @@
+1 snp1 0 2
+1 snp3 0 4
+1 snp4 0 5
+1 snp5 0 6
+1 snp6 0 7
+1 snp7 0 8
+1 snp8 0 9
+1 snp9 0 10
+1 snp10 0 11
+1 snp11 0 12
+1 snp12 0 13
+1 snp13 0 14
+1 snp14 0 15
+1 snp16 0 17
+1 snp17 0 18
+1 snp22 0 23
+1 snp24 0 25
+1 snp25 0 26
+1 snp27 0 28
+1 snp28 0 29
+1 snp29 0 30
+1 snp30 0 31
+1 snp31 0 32
+1 snp33 0 34
+1 snp34 0 35
+1 snp37 0 38
+1 snp38 0 39
+1 snp39 0 40
+1 snp40 0 41
+1 snp41 0 42
+1 snp42 0 43
+1 snp43 0 44
+1 snp45 0 46
+1 snp46 0 47
+1 snp47 0 48
+1 snp48 0 49
+1 snp49 0 50
+1 snp50 0 51
+1 snp51 0 52
+1 snp52 0 53
+1 snp53 0 54
+1 snp54 0 55
+1 snp56 0 57
+1 snp58 0 59
+1 snp59 0 60
+1 snp60 0 61
+1 snp61 0 62
+1 snp62 0 63
+1 snp63 0 64
+1 snp64 0 65
+1 snp65 0 66
+1 snp67 0 68
+1 snp68 0 69
+1 snp70 0 71
+1 snp71 0 72
+1 snp72 0 73
+1 snp73 0 74
+1 snp74 0 75
+1 snp75 0 76
+1 snp76 0 77
+1 snp77 0 78
+1 snp78 0 79
+1 snp80 0 81
+1 snp81 0 82
+1 snp83 0 84
+1 snp84 0 85
+1 snp87 0 88
+1 snp89 0 90
+1 snp90 0 91
+1 snp91 0 92
+1 snp92 0 93
+1 snp93 0 94
+1 snp94 0 95
+1 snp98 0 99
+1 snp100 0 101
+1 snp101 0 102
+1 snp102 0 103
+1 snp103 0 104
+1 snp104 0 105
+1 snp105 0 106
+1 snp106 0 107
+1 snp107 0 108
+1 snp108 0 109
+1 snp110 0 111
+1 snp111 0 112
+1 snp112 0 113
+1 snp113 0 114
+1 snp116 0 117
+1 snp117 0 118
+1 snp118 0 119
+1 snp119 0 120
+1 snp121 0 122
+1 snp122 0 123
+1 snp123 0 124
+1 snp124 0 125
+1 snp125 0 126
+1 snp126 0 127
+1 snp128 0 129
+1 snp129 0 130
+1 snp131 0 132
+1 snp133 0 134
+1 snp134 0 135
+1 snp135 0 136
+1 snp137 0 138
+1 snp138 0 139
+1 snp139 0 140
+1 snp140 0 141
+1 snp141 0 142
+1 snp143 0 144
+1 snp145 0 146
+1 snp146 0 147
+1 snp148 0 149
+1 snp149 0 150
+1 snp150 0 151
+1 snp151 0 152
+1 snp152 0 153
+1 snp153 0 154
+1 snp154 0 155
+1 snp156 0 157
+1 snp157 0 158
+1 snp158 0 159
+1 snp159 0 160
+1 snp160 0 161
+1 snp161 0 162
+1 snp162 0 163
+1 snp164 0 165
+1 snp165 0 166
+1 snp167 0 168
+1 snp168 0 169
+1 snp169 0 170
+1 snp170 0 171
+1 snp171 0 172
+1 snp172 0 173
+1 snp174 0 175
+1 snp175 0 176
+1 snp176 0 177
+1 snp177 0 178
+1 snp178 0 179
+1 snp179 0 180
+1 snp181 0 182
+1 snp182 0 183
+1 snp183 0 184
+1 snp184 0 185
+1 snp185 0 186
+1 snp186 0 187
+1 snp188 0 189
+1 snp191 0 192
+1 snp192 0 193
+1 snp193 0 194
+1 snp195 0 196
+1 snp196 0 197
+1 snp197 0 198
+1 snp199 0 200
+1 snp200 0 201
+1 snp201 0 202
+1 snp202 0 203
+1 snp203 0 204
+1 snp205 0 206
+1 snp207 0 208
+1 snp210 0 211
+1 snp211 0 212
+1 snp212 0 213
+1 snp213 0 214
+1 snp214 0 215
+1 snp215 0 216
+1 snp216 0 217
+1 snp217 0 218
+1 snp218 0 219
+1 snp219 0 220
+1 snp220 0 221
+1 snp221 0 222
+1 snp223 0 224
+1 snp224 0 225
+1 snp225 0 226
+1 snp226 0 227
+1 snp227 0 228
+1 snp228 0 229
+1 snp229 0 230
+1 snp230 0 231
+1 snp231 0 232
+1 snp232 0 233
+1 snp235 0 236
+1 snp236 0 237
+1 snp237 0 238
+1 snp239 0 240
+1 snp240 0 241
+1 snp241 0 242
+1 snp242 0 243
+1 snp243 0 244
+1 snp244 0 245
+1 snp246 0 247
+1 snp247 0 248
+1 snp248 0 249
+1 snp249 0 250
+1 snp250 0 251
+1 snp251 0 252
+1 snp252 0 253
+1 snp253 0 254
+1 snp254 0 255
+1 snp255 0 256
+1 snp256 0 257
+1 snp257 0 258
+1 snp258 0 259
+1 snp260 0 261
+1 snp261 0 262
+1 snp262 0 263
+1 snp263 0 264
+1 snp264 0 265
+1 snp265 0 266
+1 snp266 0 267
+1 snp267 0 268
+1 snp268 0 269
+1 snp269 0 270
+1 snp270 0 271
+1 snp271 0 272
+1 snp273 0 274
+1 snp274 0 275
+1 snp275 0 276
+1 snp276 0 277
+1 snp277 0 278
+1 snp278 0 279
+1 snp281 0 282
+1 snp282 0 283
+1 snp284 0 285
+1 snp287 0 288
+1 snp288 0 289
+1 snp289 0 290
+1 snp290 0 291
+1 snp291 0 292
+1 snp292 0 293
+1 snp293 0 294
+1 snp294 0 295
+1 snp297 0 298
+1 snp298 0 299
+1 snp299 0 300
+1 snp300 0 301
+1 snp301 0 302
+1 snp302 0 303
+1 snp303 0 304
+1 snp304 0 305
+1 snp307 0 308
+1 snp308 0 309
+1 snp309 0 310
+1 snp310 0 311
+1 snp312 0 313
+1 snp313 0 314
+1 snp316 0 317
+1 snp317 0 318
+1 snp320 0 321
+1 snp321 0 322
+1 snp322 0 323
+1 snp323 0 324
+1 snp324 0 325
+1 snp325 0 326
+1 snp328 0 329
+1 snp329 0 330
+1 snp331 0 332
+1 snp332 0 333
+1 snp333 0 334
+1 snp334 0 335
+1 snp335 0 336
+1 snp336 0 337
+1 snp338 0 339
+1 snp339 0 340
+1 snp341 0 342
+1 snp342 0 343
+1 snp344 0 345
+1 snp345 0 346
+1 snp348 0 349
+1 snp350 0 351
+1 snp352 0 353
+1 snp353 0 354
+1 snp354 0 355
+1 snp355 0 356
+1 snp360 0 361
+1 snp361 0 362
+1 snp362 0 363
+1 snp364 0 365
+1 snp366 0 367
+1 snp369 0 370
+1 snp370 0 371
+1 snp371 0 372
+1 snp372 0 373
+1 snp373 0 374
+1 snp374 0 375
+1 snp375 0 376
+1 snp376 0 377
+1 snp377 0 378
+1 snp378 0 379
+1 snp379 0 380
+1 snp380 0 381
+1 snp381 0 382
+1 snp382 0 383
+1 snp383 0 384
+1 snp384 0 385
+1 snp385 0 386
+1 snp386 0 387
+1 snp389 0 390
+1 snp390 0 391
+1 snp393 0 394
+1 snp395 0 396
+1 snp397 0 398
+1 snp400 0 401
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/test-data/test_out/prepare_population_structure/admix.ped	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,6 @@
+PB1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
+PB2 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
+PB3 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
+PB4 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
+PB6 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1
+PB8 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/test-data/test_out/prepare_population_structure/prepare_population_structure.html	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,47 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <meta http-equiv="Content-type" content="text/html; charset=UTF-8" />
+    <link rel="stylesheet" href="/static/style/base.css" type="text/css" />
+    <title>Prepare to look for population structure Galaxy Composite Dataset</title>
+  </head>
+  <body>
+    <div class="document">
+      Output completed: 2012-04-03 02:17:44 PM
+      <p/>
+      <div id="gd_outputs">
+        Outputs
+        <ul>
+            <li><a href="admix.ped">admix.ped</a></li>
+            <li><a href="admix.map">admix.map</a></li>
+            <li>Using 303 of 400 SNPs</li>
+        </ul>
+      </div>
+      <div id="gd_inputs">
+        Inputs
+        <ul>
+            <li>Minimum reads covering a SNP, per individual: 3</li>
+            <li>Minimum quality value, per individual: 30</li>
+            <li>Minimum spacing between SNPs on the same scaffold: 0</li>
+        </ul>
+      </div>
+      <div id="gd_misc">
+        Populations
+<ul>
+<li>
+All Individuals
+<ol>
+<li>PB1</li>
+<li>PB2</li>
+<li>PB3</li>
+<li>PB4</li>
+<li>PB6</li>
+<li>PB8</li>
+</ol>
+</li>
+</ul>
+      </div>
+    </div>
+  </body>
+</html>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/test-data/test_out/rank_pathways/rank_pathways.tabular	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,240 @@
+3	0.25	1	cfa03450=Non-homologous end-joining
+1	0.25	1	cfa00750=Vitamin B6 metabolism
+2	0.2	3	cfa00290=Valine, leucine and isoleucine biosynthesis
+3	0.18	4	cfa00770=Pantothenate and CoA biosynthesis
+5	0.17	5	cfa05310=Asthma
+4	0.16	6	cfa00760=Nicotinate and nicotinamide metabolism
+2	0.12	7	cfa00450=Selenocompound metabolism
+4	0.11	8	cfa05330=Allograft rejection
+5	0.098	9	cfa04672=Intestinal immune network for IgA production
+4	0.098	9	cfa02010=ABC transporters
+2	0.095	11	cfa03430=Mismatch repair
+4	0.089	12	cfa05320=Autoimmune thyroid disease
+4	0.089	12	cfa00280=Valine, leucine and isoleucine degradation
+3	0.088	14	cfa03410=Base excision repair
+3	0.088	14	cfa03030=DNA replication
+3	0.088	14	cfa00565=Ether lipid metabolism
+6	0.087	17	cfa05140=Leishmaniasis
+2	0.087	17	cfa04977=Vitamin digestion and absorption
+1	0.083	19	cfa00740=Riboflavin metabolism
+4	0.08	20	cfa05150=Staphylococcus aureus infection
+2	0.08	20	cfa03060=Protein export
+3	0.079	22	cfa05340=Primary immunodeficiency
+3	0.079	22	cfa05143=African trypanosomiasis
+6	0.078	24	cfa00564=Glycerophospholipid metabolism
+2	0.077	25	cfa00410=beta-Alanine metabolism
+2	0.071	26	cfa05332=Graft-versus-host disease
+5	0.069	27	cfa03320=PPAR signaling pathway
+6	0.066	28	cfa05323=Rheumatoid arthritis
+5	0.063	29	cfa04664=Fc epsilon RI signaling pathway
+3	0.062	30	cfa00561=Glycerolipid metabolism
+2	0.062	30	cfa00350=Tyrosine metabolism
+2	0.062	30	cfa00020=Citrate cycle (TCA cycle)
+2	0.059	33	cfa00260=Glycine, serine and threonine metabolism
+1	0.059	33	cfa04614=Renin-angiotensin system
+1	0.059	33	cfa00360=Phenylalanine metabolism
+9	0.058	36	cfa04145=Phagosome
+3	0.058	36	cfa05213=Endometrial cancer
+4	0.057	38	cfa05416=Viral myocarditis
+2	0.057	38	cfa00500=Starch and sucrose metabolism
+2	0.056	40	cfa04130=SNARE interactions in vesicular transport
+1	0.056	40	cfa00592=alpha-Linolenic acid metabolism
+1	0.053	42	cfa04964=Proximal tubule bicarbonate reclamation
+1	0.053	42	cfa00630=Glyoxylate and dicarboxylate metabolism
+3	0.052	44	cfa04621=NOD-like receptor signaling pathway
+2	0.05	45	cfa05219=Bladder cancer
+2	0.05	45	cfa04940=Type I diabetes mellitus
+2	0.05	45	cfa00380=Tryptophan metabolism
+2	0.047	48	cfa03420=Nucleotide excision repair
+3	0.045	49	cfa04920=Adipocytokine signaling pathway
+3	0.045	49	cfa00970=Aminoacyl-tRNA biosynthesis
+2	0.045	49	cfa00071=Fatty acid metabolism
+1	0.045	49	cfa00591=Linoleic acid metabolism
+1	0.045	49	cfa00340=Histidine metabolism
+4	0.043	54	cfa04972=Pancreatic secretion
+2	0.043	54	cfa03022=Basal transcription factors
+2	0.043	54	cfa00982=Drug metabolism - cytochrome P450
+3	0.042	57	cfa05218=Melanoma
+3	0.042	57	cfa05211=Renal cell carcinoma
+4	0.041	59	cfa05414=Dilated cardiomyopathy
+2	0.04	60	cfa00590=Arachidonic acid metabolism
+1	0.04	60	cfa04320=Dorso-ventral axis formation
+3	0.039	62	cfa04662=B cell receptor signaling pathway
+2	0.039	62	cfa00310=Lysine degradation
+3	0.038	64	cfa04512=ECM-receptor interaction
+2	0.038	64	cfa05144=Malaria
+2	0.038	64	cfa00270=Cysteine and methionine metabolism
+1	0.038	64	cfa03440=Homologous recombination
+1	0.038	64	cfa00052=Galactose metabolism
+8	0.037	69	cfa04810=Regulation of actin cytoskeleton
+4	0.037	69	cfa05146=Amoebiasis
+4	0.037	69	cfa04666=Fc gamma R-mediated phagocytosis
+2	0.037	69	cfa05223=Non-small cell lung cancer
+6	0.036	73	cfa05168=Herpes simplex infection
+6	0.036	73	cfa05152=Tuberculosis
+3	0.036	73	cfa04640=Hematopoietic cell lineage
+7	0.034	76	cfa04510=Focal adhesion
+3	0.034	76	cfa00240=Pyrimidine metabolism
+3	0.033	78	cfa03008=Ribosome biogenesis in eukaryotes
+1	0.033	78	cfa00983=Drug metabolism - other enzymes
+2	0.032	80	cfa04976=Bile secretion
+6	0.031	81	cfa04060=Cytokine-cytokine receptor interaction
+4	0.031	81	cfa04110=Cell cycle
+1	0.031	81	cfa00250=Alanine, aspartate and glutamate metabolism
+4	0.029	84	cfa05145=Toxoplasmosis
+3	0.029	84	cfa04650=Natural killer cell mediated cytotoxicity
+2	0.029	84	cfa05214=Glioma
+4	0.028	87	cfa05162=Measles
+2	0.028	87	cfa05412=Arrhythmogenic right ventricular cardiomyopathy (ARVC)
+7	0.027	89	cfa05166=HTLV-I infection
+4	0.027	89	cfa05322=Systemic lupus erythematosus
+2	0.027	89	cfa05212=Pancreatic cancer
+2	0.026	92	cfa04146=Peroxisome
+2	0.026	92	cfa04070=Phosphatidylinositol signaling system
+1	0.026	92	cfa04978=Mineral absorption
+2	0.025	95	cfa05133=Pertussis
+2	0.025	95	cfa04612=Antigen processing and presentation
+2	0.025	95	cfa04350=TGF-beta signaling pathway
+1	0.025	95	cfa00830=Retinol metabolism
+3	0.024	99	cfa04514=Cell adhesion molecules (CAMs)
+2	0.024	99	cfa05410=Hypertrophic cardiomyopathy (HCM)
+2	0.024	99	cfa04012=ErbB signaling pathway
+1	0.024	99	cfa00980=Metabolism of xenobiotics by cytochrome P450
+1	0.024	99	cfa00640=Propanoate metabolism
+3	0.023	104	cfa04360=Axon guidance
+2	0.023	104	cfa04620=Toll-like receptor signaling pathway
+1	0.023	104	cfa04975=Fat digestion and absorption
+1	0.023	104	cfa04330=Notch signaling pathway
+7	0.022	108	cfa05200=Pathways in cancer
+3	0.022	108	cfa04910=Insulin signaling pathway
+2	0.022	108	cfa05215=Prostate cancer
+1	0.022	108	cfa03460=Fanconi anemia pathway
+24	0.021	112	cfa01100=Metabolic pathways
+3	0.021	112	cfa04630=Jak-STAT signaling pathway
+1	0.021	112	cfa00480=Glutathione metabolism
+3	0.020	115	cfa00230=Purine metabolism
+2	0.020	115	cfa04540=Gap junction
+1	0.02	115	cfa00620=Pyruvate metabolism
+2	0.019	118	cfa04912=GnRH signaling pathway
+2	0.018	119	cfa05142=Chagas disease (American trypanosomiasis)
+2	0.018	119	cfa04380=Osteoclast differentiation
+1	0.018	119	cfa05221=Acute myeloid leukemia
+1	0.018	119	cfa00330=Arginine and proline metabolism
+3	0.017	123	cfa05164=Influenza A
+2	0.017	123	cfa04270=Vascular smooth muscle contraction
+2	0.017	123	cfa04114=Oocyte meiosis
+3	0.016	126	cfa04141=Protein processing in endoplasmic reticulum
+3	0.016	126	cfa04020=Calcium signaling pathway
+2	0.016	126	cfa05160=Hepatitis C
+2	0.016	126	cfa04670=Leukocyte transendothelial migration
+1	0.016	126	cfa05210=Colorectal cancer
+1	0.016	126	cfa04610=Complement and coagulation cascades
+1	0.016	126	cfa04150=mTOR signaling pathway
+4	0.015	133	cfa04010=MAPK signaling pathway
+1	0.015	133	cfa04974=Protein digestion and absorption
+1	0.015	133	cfa04730=Long-term depression
+1	0.015	133	cfa04115=p53 signaling pathway
+1	0.014	137	cfa05220=Chronic myeloid leukemia
+1	0.014	137	cfa04971=Gastric acid secretion
+1	0.014	137	cfa04720=Long-term potentiation
+1	0.014	137	cfa04370=VEGF signaling pathway
+1	0.014	137	cfa04260=Cardiac muscle contraction
+1	0.014	137	cfa03018=RNA degradation
+2	0.013	143	cfa00010=Glycolysis / Gluconeogenesis
+1	0.013	143	cfa04970=Salivary secretion
+1	0.013	143	cfa04520=Adherens junction
+2	0.012	146	cfa04062=Chemokine signaling pathway
+1	0.012	146	cfa05134=Legionellosis
+1	0.012	146	cfa05132=Salmonella infection
+1	0.012	146	cfa04727=GABAergic synapse
+1	0.012	146	cfa04210=Apoptosis
+1	0.011	151	cfa03015=mRNA surveillance pathway
+1	0.010	152	cfa04914=Progesterone-mediated oocyte maturation
+1	0.0098	153	cfa04916=Melanogenesis
+2	0.0095	154	cfa04144=Endocytosis
+1	0.0087	155	cfa04142=Lysosome
+1	0.0086	156	cfa04660=T cell receptor signaling pathway
+1	0.0082	157	cfa04724=Glutamatergic synapse
+2	0.0081	158	cfa04080=Neuroactive ligand-receptor interaction
+1	0.0079	159	cfa04728=Dopaminergic synapse
+2	0.0074	160	cfa05010=Alzheimer's disease
+1	0.0074	160	cfa04722=Neurotrophin signaling pathway
+1	0.0074	160	cfa04120=Ubiquitin mediated proteolysis
+1	0.0068	163	cfa00190=Oxidative phosphorylation
+1	0.0067	164	cfa05012=Parkinson's disease
+1	0.0057	165	cfa03013=RNA transport
+1	0.0056	166	cfa03040=Spliceosome
+1	0.0049	167	cfa05016=Huntington's disease
+1	0.0023	168	cfa04740=Olfactory transduction
+0	0	169	cfa05222=Small cell lung cancer
+0	0	169	cfa05217=Basal cell carcinoma
+0	0	169	cfa05216=Thyroid cancer
+0	0	169	cfa05100=Bacterial invasion of epithelial cells
+0	0	169	cfa05020=Prion diseases
+0	0	169	cfa05014=Amyotrophic lateral sclerosis (ALS)
+0	0	169	cfa04973=Carbohydrate digestion and absorption
+0	0	169	cfa04966=Collecting duct acid secretion
+0	0	169	cfa04962=Vasopressin-regulated water reabsorption
+0	0	169	cfa04961=Endocrine and other factor-regulated calcium reabsorption
+0	0	169	cfa04960=Aldosterone-regulated sodium reabsorption
+0	0	169	cfa04950=Maturity onset diabetes of the young
+0	0	169	cfa04930=Type II diabetes mellitus
+0	0	169	cfa04744=Phototransduction
+0	0	169	cfa04742=Taste transduction
+0	0	169	cfa04725=Cholinergic synapse
+0	0	169	cfa04721=Synaptic vesicle cycle
+0	0	169	cfa04710=Circadian rhythm - mammal
+0	0	169	cfa04623=Cytosolic DNA-sensing pathway
+0	0	169	cfa04622=RIG-I-like receptor signaling pathway
+0	0	169	cfa04530=Tight junction
+0	0	169	cfa04340=Hedgehog signaling pathway
+0	0	169	cfa04310=Wnt signaling pathway
+0	0	169	cfa04140=Regulation of autophagy
+0	0	169	cfa04122=Sulfur relay system
+0	0	169	cfa03050=Proteasome
+0	0	169	cfa03020=RNA polymerase
+0	0	169	cfa03010=Ribosome
+0	0	169	cfa01040=Biosynthesis of unsaturated fatty acids
+0	0	169	cfa00920=Sulfur metabolism
+0	0	169	cfa00910=Nitrogen metabolism
+0	0	169	cfa00900=Terpenoid backbone biosynthesis
+0	0	169	cfa00860=Porphyrin and chlorophyll metabolism
+0	0	169	cfa00790=Folate biosynthesis
+0	0	169	cfa00785=Lipoic acid metabolism
+0	0	169	cfa00780=Biotin metabolism
+0	0	169	cfa00730=Thiamine metabolism
+0	0	169	cfa00670=One carbon pool by folate
+0	0	169	cfa00650=Butanoate metabolism
+0	0	169	cfa00604=Glycosphingolipid biosynthesis - ganglio series
+0	0	169	cfa00603=Glycosphingolipid biosynthesis - globo series
+0	0	169	cfa00601=Glycosphingolipid biosynthesis - lacto and neolacto series
+0	0	169	cfa00600=Sphingolipid metabolism
+0	0	169	cfa00563=Glycosylphosphatidylinositol(GPI)-anchor biosynthesis
+0	0	169	cfa00562=Inositol phosphate metabolism
+0	0	169	cfa00534=Glycosaminoglycan biosynthesis - heparan sulfate
+0	0	169	cfa00533=Glycosaminoglycan biosynthesis - keratan sulfate
+0	0	169	cfa00532=Glycosaminoglycan biosynthesis - chondroitin sulfate
+0	0	169	cfa00531=Glycosaminoglycan degradation
+0	0	169	cfa00520=Amino sugar and nucleotide sugar metabolism
+0	0	169	cfa00514=Other types of O-glycan biosynthesis
+0	0	169	cfa00512=Mucin type O-Glycan biosynthesis
+0	0	169	cfa00511=Other glycan degradation
+0	0	169	cfa00510=N-Glycan biosynthesis
+0	0	169	cfa00472=D-Arginine and D-ornithine metabolism
+0	0	169	cfa00471=D-Glutamine and D-glutamate metabolism
+0	0	169	cfa00460=Cyanoamino acid metabolism
+0	0	169	cfa00430=Taurine and hypotaurine metabolism
+0	0	169	cfa00400=Phenylalanine, tyrosine and tryptophan biosynthesis
+0	0	169	cfa00300=Lysine biosynthesis
+0	0	169	cfa00232=Caffeine metabolism
+0	0	169	cfa00140=Steroid hormone biosynthesis
+0	0	169	cfa00130=Ubiquinone and other terpenoid-quinone biosynthesis
+0	0	169	cfa00120=Primary bile acid biosynthesis
+0	0	169	cfa00100=Steroid biosynthesis
+0	0	169	cfa00072=Synthesis and degradation of ketone bodies
+0	0	169	cfa00062=Fatty acid elongation in mitochondria
+0	0	169	cfa00061=Fatty acid biosynthesis
+0	0	169	cfa00053=Ascorbate and aldarate metabolism
+0	0	169	cfa00051=Fructose and mannose metabolism
+0	0	169	cfa00040=Pentose and glucuronate interconversions
+0	0	169	cfa00030=Pentose phosphate pathway
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/test-data/test_out/select_snps/select_snps.gd_snp	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,102 @@
+#{"column_names":["scaf","pos","A","B","qual","ref","rpos","rnuc","1A","1B","1G","1Q","2A","2B","2G","2Q","3A","3B","3G","3Q","4A","4B","4G","4Q","5A","5B","5G","5Q","6A","6B","6G","6Q","pair","dist",
+#"prim","rflp"],"dbkey":"canFam2","individuals":[["PB1",9],["PB2",13],["PB3",17],["PB4",21],["PB6",25],["PB8",29]],"pos":2,"rPos":7,"ref":6,"scaffold":1,"species":"bear"}
+Contig161_chr1_4641264_4641879	115	C	T	73.5	chr1	4641382	C	6	0	2	45	8	0	2	51	15	0	2	72	5	0	2	42	6	0	2	45	10	0	2	57	Y	54	0.323	0
+Contig86_chr1_30984450_30985684	670	C	T	365.0	chr1	30985133	C	9	0	2	54	10	0	2	57	13	0	2	66	3	0	2	36	9	0	2	54	7	0	2	48	Y	145	0.031	0
+Contig21_chr1_60697952_60699446	307	G	A	51.9	chr1	60698265	G	12	0	2	63	9	0	2	54	4	0	2	39	6	0	2	45	9	0	2	54	4	0	2	39	Y	98	0.507	0
+Contig64_chr1_87343284_87345672	163	T	A	3.76	chr1	87343443	C	0	2	2	1	0	0	-1	0	5	0	2	42	2	0	2	33	0	1	2	14	0	0	-1	0	N	3	0.039	2
+Contig20_chr1_110679280_110679687	181	C	T	87.4	chr1	110679454	-	1	0	2	30	7	0	2	48	4	0	2	39	2	0	2	33	2	0	2	33	0	0	-1	0	N	31	0.660	2
+Contig222_chr2_9817738_9818143	220	C	T	888.0	chr2	9817960	C	17	0	2	78	12	0	2	63	20	0	2	87	8	0	2	51	11	0	2	60	12	0	2	63	Y	76	0.093	1
+Contig47_chr2_25470778_25471576	126	G	A	888.0	chr2	25470896	G	12	0	2	63	14	0	2	69	14	0	2	69	10	0	2	57	18	0	2	81	13	0	2	66	N	11	0.289	1
+Contig6_chr2_56859179_56859956	671	T	C	999.9	chr2	56859851	T	15	0	2	72	18	0	2	81	20	0	2	90	19	0	2	84	19	0	2	84	24	0	2	99	N	28	5.308	1
+Contig163_chr2_76402959_76404830	221	C	T	127.0	chr2	76403181	C	4	0	2	42	10	0	2	57	9	0	2	54	11	0	2	60	7	0	2	48	9	0	2	54	Y	54	0.178	1
+Contig56_chr3_17326225_17327548	387	G	C	91.2	chr3	17326591	G	14	0	2	69	13	0	2	66	15	0	2	72	15	0	2	72	13	0	2	66	12	0	2	63	Y	20	0.225	3
+Contig108_chr3_46210055_46210874	367	A	G	21.0	chr3	46210423	A	19	0	2	84	10	0	2	57	16	0	2	75	14	0	2	69	20	0	2	87	11	0	2	60	N	236	0.028	1
+Contig1_chr3_51588422_51589409	926	A	G	51.0	chr3	51589353	G	2	0	2	33	2	0	2	33	6	0	2	45	4	0	2	39	9	0	2	54	11	0	2	60	N	21	1.147	0
+Contig65_chr3_80727952_80728283	39	T	C	71.2	chr3	80727990	T	7	0	2	48	3	0	2	36	8	0	2	51	6	0	2	45	8	0	2	51	11	0	2	60	N	22	7.078	0
+Contig134_chr4_12145648_12148225	1326	C	T	164.0	chr4	12146961	C	9	0	2	54	8	0	2	51	7	0	2	48	3	0	2	36	5	0	2	42	5	0	2	42	Y	4	0.080	1
+Contig19_chr4_26233601_26233991	146	G	C	51.6	chr4	26233744	G	10	0	2	57	8	0	2	51	9	0	2	54	5	0	2	42	9	0	2	54	4	0	2	39	N	41	0.163	3
+Contig17_chr4_61310346_61311158	267	C	T	49.9	chr4	61310604	T	10	0	2	57	7	0	2	48	9	0	2	54	10	0	2	57	14	0	2	69	7	0	2	48	Y	219	0.098	0
+Contig31_chr5_4734956_4736547	1166	C	T	133.0	chr5	4736132	C	14	0	2	69	8	0	2	51	17	0	2	78	4	0	2	39	9	0	2	54	12	0	2	63	Y	1	0.021	0
+Contig6_chr5_26899813_26900498	97	A	C	88.6	chr5	26899910	A	15	0	2	72	14	0	2	69	27	0	2	108	15	0	2	72	13	0	2	69	12	0	2	63	Y	92	7.370	3
+Contig45_chr5_50892738_50892968	169	C	A	25.8	chr5	50892911	C	10	0	2	57	7	0	2	48	10	0	2	60	6	0	2	45	6	0	2	45	13	0	2	66	N	244	0.497	1
+Contig45_chr5_76133561_76134403	388	A	G	103.0	chr5	76133941	G	3	0	2	36	8	0	2	51	8	0	2	51	5	0	2	42	6	0	2	45	7	0	2	48	Y	57	0.038	0
+Contig111_chr6_5821219_5822519	1060	A	G	68.1	chr6	5822321	T	7	0	2	48	6	0	2	45	11	0	2	60	9	0	2	54	3	0	2	36	12	0	2	63	Y	7	0.231	1
+Contig102_chr6_30271329_30271577	39	T	G	139.0	chr6	30271371	G	3	0	2	36	4	0	2	39	6	0	2	45	1	0	2	30	4	0	2	39	4	0	2	39	N	15	1.159	0
+Contig112_chr6_51024554_51024851	100	A	G	121.0	chr6	51024654	A	10	0	2	57	12	0	2	63	9	0	2	54	13	0	2	66	14	0	2	69	17	0	2	78	N	75	4.287	0
+Contig84_chr7_6648683_6650255	1297	G	A	110.0	chr7	6649988	G	18	0	2	81	9	0	2	54	22	0	2	77	16	0	2	75	20	0	2	87	6	0	2	45	Y	83	0.166	0
+Contig206_chr7_26281823_26282074	103	C	A	101.0	chr7	26281925	T	11	0	2	60	16	0	2	61	19	0	2	84	6	0	2	45	19	0	2	84	16	0	2	75	N	-1	0.947	1
+Contig38_chr7_50681997_50682600	42	T	C	92.4	chr7	50682037	G	6	0	2	45	2	0	2	33	10	0	2	57	12	0	2	63	5	0	2	42	6	0	2	45	Y	94	0.146	0
+Contig91_chr8_12804505_12805470	409	C	A	111.0	chr8	12804906	C	8	0	2	51	10	0	2	57	15	0	2	72	12	0	2	63	14	0	2	69	15	0	2	72	N	145	0.175	0
+Contig8_chr8_27811135_27812620	333	C	T	37.9	chr8	27811458	C	4	0	2	39	11	0	2	60	18	0	2	81	5	0	2	42	6	0	2	45	5	0	2	42	Y	1	0.272	0
+Contig17_chr8_57490059_57490498	69	G	T	97.4	chr8	57490127	A	2	0	2	33	11	0	2	60	15	0	2	72	16	0	2	75	8	0	2	51	10	0	2	57	N	40	0.522	5
+Contig73_chr9_29451535_29452248	616	A	G	24.7	chr9	29452127	G	4	0	2	39	7	0	2	48	1	0	2	30	4	0	2	39	7	0	2	48	6	0	2	45	N	49	0.448	4
+Contig96_chr9_39008495_39009278	215	A	C	98.7	chr9	39008708	C	7	0	2	48	13	0	2	66	28	0	2	111	16	0	2	75	17	0	2	78	17	0	2	78	Y	8	0.427	1
+Contig22_chr10_15505382_15505589	172	T	C	38.5	chr10	15505548	T	2	0	2	33	6	0	2	45	8	0	2	51	8	0	2	51	9	0	2	54	12	0	2	63	N	284	2.861	0
+Contig69_chr10_40547265_40548153	371	G	A	58.1	chr10	40547649	A	9	0	2	54	8	0	2	51	8	0	2	51	9	0	2	54	4	0	2	39	5	0	2	42	Y	20	0.138	4
+Contig9_chr10_51475063_51476054	770	C	T	57.3	chr10	51475839	C	6	0	2	45	16	0	2	75	16	0	2	75	13	0	2	66	9	0	2	54	9	2	2	21	N	80	0.394	0
+Contig72_chr11_7142765_7143772	146	G	A	152.0	chr11	7142911	A	8	0	2	51	8	0	2	51	24	0	2	99	10	0	2	57	17	0	2	78	11	0	2	60	Y	90	1.137	0
+Contig7_chr11_40017076_40017630	352	C	T	46.3	chr11	40017422	C	7	0	2	48	9	0	2	54	6	0	2	45	8	0	2	51	16	0	2	75	9	0	2	54	Y	44	0.336	0
+Contig16_chr11_53408448_53408790	187	A	G	153.0	chr11	53408638	A	7	0	2	48	9	0	2	54	18	0	2	81	10	0	2	57	11	0	2	60	12	0	2	63	Y	116	1.367	0
+Contig21_chr12_18403415_18404381	586	G	T	34.5	chr12	18403983	-	13	0	2	66	16	0	2	75	25	0	2	102	12	0	2	63	12	0	2	63	14	0	2	69	Y	12	0.068	0
+Contig41_chr12_25565452_25566993	475	G	T	6.29	chr12	25565926	G	15	0	2	72	14	0	2	69	10	0	2	57	15	0	2	72	18	0	2	81	19	0	2	84	N	10	2.231	1
+Contig5_chr12_53880670_53882675	1221	A	C	99.4	chr12	53881888	A	16	0	2	75	18	0	2	81	23	0	2	96	10	0	2	57	15	0	2	72	17	0	2	78	Y	31	0.061	0
+Contig107_chr13_26045881_26046290	341	C	G	81.4	chr13	26046230	C	16	0	2	75	20	0	2	90	14	0	2	69	15	0	2	72	9	0	2	54	9	0	2	54	Y	51	4.510	0
+Contig251_chr13_28498333_28501066	864	T	G	296.0	chr13	28499180	T	3	0	2	36	5	0	2	42	4	0	2	39	2	0	2	33	5	0	2	42	6	0	2	45	Y	9	0.068	0
+Contig55_chr13_53467708_53468101	221	T	G	132.0	chr13	53467925	T	25	0	2	102	12	0	2	63	26	0	2	105	7	0	2	48	16	0	2	75	16	0	2	75	N	20	5.717	1
+Contig48_chr14_11839435_11843272	3014	A	G	163.0	chr14	11842446	A	10	0	2	57	8	0	2	51	13	0	2	66	10	0	2	57	5	0	2	42	10	0	2	57	Y	31	0.908	0
+Contig28_chr14_26905747_26909514	975	G	C	3.13	chr14	26906723	G	16	0	2	75	10	0	2	57	12	0	2	63	15	0	2	72	10	0	2	57	7	0	2	48	N	287	0.117	2
+Contig64_chr14_56768376_56768902	473	C	T	29.0	chr14	56768832	C	15	0	2	72	11	0	2	60	14	0	2	69	14	0	2	69	7	0	2	48	9	0	2	54	Y	91	8.281	0
+Contig60_chr15_18493036_18494316	150	G	A	92.6	chr15	18493188	G	9	0	2	54	13	0	2	66	9	0	2	54	6	0	2	45	5	0	2	42	12	0	2	63	Y	45	0.125	0
+Contig112_chr15_26772864_26773267	374	C	T	21.6	chr15	26773244	C	4	0	2	39	4	0	2	39	5	0	2	42	2	0	2	33	4	0	2	39	3	0	2	36	N	18	+99.	0
+Contig119_chr16_6160274_6160477	180	G	A	54.8	chr16	6160457	G	7	0	2	48	6	0	2	45	12	0	2	63	3	0	2	36	11	0	2	60	10	0	2	57	N	42	+99.	0
+Contig60_chr16_28079136_28080263	588	T	G	157.0	chr16	28079739	T	22	0	2	93	20	0	2	87	22	0	2	93	17	0	2	78	12	0	2	63	10	0	2	57	Y	105	5.999	1
+Contig31_chr17_12128267_12129637	205	G	A	90.5	chr17	12128484	G	7	0	2	48	6	0	2	45	6	0	2	45	11	0	2	60	7	0	2	48	4	0	2	39	Y	10	0.246	0
+Contig99_chr17_26021506_26022200	505	C	T	88.8	chr17	26022017	T	15	0	2	72	13	0	2	66	19	0	2	84	9	0	2	54	10	0	2	57	11	0	2	60	Y	1	0.172	1
+Contig27_chr17_61713766_61716585	1056	G	C	40.0	chr17	61714821	G	4	0	2	39	8	0	2	51	10	0	2	57	6	0	2	45	6	0	2	45	3	0	2	36	N	6	2.200	4
+Contig229_chr18_3706523_3708577	1076	A	G	83.9	chr18	3707630	A	11	0	2	60	13	0	2	66	26	0	2	105	11	0	2	60	15	0	2	72	17	0	2	78	Y	63	0.445	0
+Contig82_chr18_27305489_27306229	566	C	T	49.5	chr18	27306051	A	6	0	2	45	6	0	2	45	10	0	2	57	11	0	2	60	6	0	2	45	7	0	2	48	N	1	0.349	0
+Contig64_chr18_55979770_55980315	49	G	A	89.1	chr18	55979824	G	3	0	2	36	9	0	2	54	7	0	2	51	4	0	2	39	3	0	2	36	3	0	2	36	Y	-1	2.124	0
+Contig146_chr19_5221790_5223013	143	A	G	114.0	chr19	5221916	-	1	0	2	30	4	0	2	39	3	0	2	36	5	0	2	42	2	0	2	33	5	0	2	42	Y	12	0.870	0
+Contig129_chr19_25541958_25542221	202	T	C	68.1	chr19	25542154	C	11	0	2	60	19	0	2	84	10	0	2	60	17	0	2	78	9	0	2	54	12	0	2	63	N	-1	2.551	1
+Contig60_chr19_54013816_54014398	281	A	G	138.0	chr19	54014103	C	6	0	2	45	15	0	2	72	7	0	2	48	10	0	2	57	15	0	2	72	10	0	2	57	Y	188	1.271	0
+Contig50_chr20_12138509_12141975	3206	C	A	248.0	chr20	12141763	C	8	0	2	51	15	0	2	72	14	0	2	69	6	0	2	45	10	0	2	57	7	0	2	48	Y	2	0.384	0
+Contig36_chr20_32631363_32632049	176	G	A	24.1	chr20	32631526	G	7	0	2	48	14	0	2	69	19	0	2	84	14	0	2	69	15	0	2	72	16	0	2	75	N	50	1.150	0
+Contig50_chr21_4178523_4178687	121	G	A	362.0	chr21	4178640	G	8	0	2	51	14	0	2	69	5	0	2	42	3	0	2	36	11	0	2	60	4	0	2	39	N	392	0.483	0
+Contig129_chr21_31045749_31046924	381	A	G	129.0	chr21	31046141	A	19	0	2	84	8	0	2	51	23	0	2	96	12	0	2	63	15	0	2	72	18	0	2	81	Y	69	0.028	2
+Contig159_chr22_7896450_7896974	109	G	C	151.0	chr22	7896570	G	16	0	2	75	5	7	1	62	14	0	2	69	16	0	2	75	13	0	2	66	13	0	2	66	Y	16	0.465	0
+Contig23_chr22_34612023_34612568	167	C	G	92.3	chr22	34612181	C	11	0	2	60	18	0	2	81	13	0	2	66	8	0	2	51	12	0	2	63	14	0	2	69	Y	7	0.409	0
+Contig26_chr22_57817664_57819633	1453	A	G	150.0	chr22	57819121	G	9	0	2	54	9	0	2	54	13	0	2	66	15	0	2	72	11	0	2	60	14	0	2	69	N	15	0.471	1
+Contig133_chr23_3525134_3526502	1223	A	G	201.0	chr23	3526387	A	11	0	2	60	13	0	2	66	23	0	2	96	21	0	2	90	13	0	2	66	10	0	2	57	Y	61	1.359	0
+Contig35_chr23_28447813_28449115	70	T	A	21.3	chr23	28447881	T	9	0	2	54	8	0	2	51	10	0	2	57	9	0	2	54	10	0	2	57	12	0	2	63	N	251	0.163	1
+Contig50_chr24_22515247_22516072	761	C	T	243.0	chr24	22515981	T	11	0	2	60	10	0	2	57	8	0	2	51	9	0	2	54	18	0	2	81	8	0	2	51	Y	1	0.190	0
+Contig84_chr24_29196623_29199644	466	C	T	126.0	chr24	29197091	T	7	0	2	48	11	0	2	60	8	0	2	51	7	0	2	48	11	0	2	60	15	0	2	72	Y	42	0.215	0
+Contig144_chr25_4011170_4013134	541	A	G	160.0	chr25	4011690	A	12	0	2	63	17	0	2	78	13	0	2	66	13	0	2	66	13	0	2	66	13	0	2	66	Y	5	0.087	0
+Contig103_chr25_38891221_38892140	407	G	A	131.0	chr25	38891644	G	8	0	2	51	14	0	2	69	18	0	2	81	8	0	2	51	8	0	2	51	11	0	2	60	Y	149	0.167	4
+Contig204_chr26_4311195_4311778	170	C	T	16.9	chr26	4311363	T	20	0	2	87	8	0	2	51	13	0	2	66	18	0	2	81	11	0	2	60	14	0	2	69	N	35	0.085	0
+Contig146_chr26_26622638_26623906	574	G	A	186.0	chr26	26623219	A	11	0	2	60	12	0	2	63	9	0	2	54	11	0	2	60	9	0	2	54	12	0	2	63	Y	1	0.318	0
+Contig135_chr27_6853874_6854079	158	C	T	116.0	chr27	6854032	T	18	0	2	81	19	0	2	84	13	0	2	66	7	0	2	48	8	0	2	51	11	0	2	60	N	4	0.060	1
+Contig64_chr27_34654435_34654621	132	C	A	115.0	chr27	34654567	T	2	0	2	33	2	0	2	33	5	0	2	42	3	0	2	36	3	0	2	36	8	0	2	51	N	12	0.297	1
+Contig131_chr28_6481806_6483783	138	C	T	36.2	chr28	6481953	C	12	0	2	63	12	0	2	63	20	0	2	87	11	0	2	60	10	0	2	57	12	0	2	63	Y	10	0.387	0
+Contig60_chr28_30197166_30197364	92	T	C	164.0	chr28	30197258	T	10	0	2	57	13	0	2	66	15	0	2	72	16	0	2	75	12	0	2	63	11	0	2	60	N	369	1.139	0
+Contig29_chr29_4726399_4727143	559	A	T	163.0	chr29	4726955	A	15	0	2	72	18	0	2	81	18	0	2	81	16	0	2	75	11	0	2	60	14	0	2	72	Y	161	3.114	0
+Contig1_chr30_5992217_5993068	106	C	T	129.0	chr30	5992319	C	10	0	2	57	11	0	2	60	7	0	2	48	11	0	2	60	10	0	2	57	12	0	2	63	Y	76	1.079	0
+Contig165_chr30_25804389_25804926	190	T	C	126.0	chr30	25804592	C	3	0	2	36	8	0	2	51	7	0	2	48	10	0	2	57	7	0	2	48	4	0	2	39	Y	113	0.329	0
+Contig38_chr31_5164423_5166573	2074	C	T	134.0	chr31	5166501	T	13	0	2	66	10	0	2	57	17	0	2	78	11	0	2	60	17	0	2	78	10	0	2	57	Y	58	+99.	0
+Contig17_chr31_26433828_26434459	498	T	C	9.79	chr31	26434322	T	18	0	2	81	10	0	2	57	15	0	2	72	13	0	2	66	16	0	2	75	15	0	2	72	Y	137	4.814	0
+Contig9_chr32_19479532_19479735	12	A	G	20.7	chr32	19479544	A	1	0	2	30	2	0	2	33	1	0	2	30	5	0	2	42	3	0	2	36	3	0	2	36	N	17	+99.	0
+Contig30_chr32_25902721_25905783	208	C	G	162.0	chr32	25902927	G	11	0	2	60	13	0	2	66	11	0	2	60	12	0	2	63	7	0	2	48	11	0	2	60	Y	145	0.322	2
+Contig18_chr33_22207246_22209159	1363	G	T	51.5	chr33	22208619	-	16	0	2	75	8	0	2	51	11	0	2	60	10	0	2	57	15	0	2	72	12	0	2	63	Y	59	2.560	0
+Contig170_chr33_26189421_26189940	292	T	C	98.4	chr33	26189703	T	21	0	2	90	13	0	2	66	15	0	2	72	13	0	2	66	19	0	2	84	13	0	2	66	Y	23	0.307	0
+Contig113_chr34_13341080_13341643	236	C	T	90.7	chr34	13341316	C	4	0	2	39	2	0	2	33	8	0	2	51	4	0	2	39	8	0	2	51	3	0	2	36	Y	47	0.412	3
+Contig152_chr34_31794848_31795540	242	G	A	93.2	chr34	31795093	G	11	0	2	60	24	0	2	99	17	0	2	78	15	0	2	72	18	0	2	81	17	0	2	78	Y	123	2.780	0
+Contig47_chr35_3666773_3667898	348	G	T	124.0	chr35	3667121	G	9	0	2	54	20	0	2	87	18	0	2	81	15	0	2	72	12	0	2	63	14	0	2	69	Y	285	0.235	0
+Contig74_chr35_25394343_25394813	303	A	T	221.0	chr35	25394646	G	23	0	2	96	15	0	2	72	25	0	2	105	7	7	1	49	18	0	2	81	16	0	2	75	Y	58	4.298	0
+Contig5_chr36_4562983_4563634	343	C	T	151.0	chr36	4563324	T	20	0	2	87	20	0	2	87	23	0	2	96	24	0	2	99	9	0	2	54	8	0	2	51	Y	40	1.169	0
+Contig133_chr36_32954045_32955409	136	A	G	116.0	chr36	32954182	A	16	0	2	75	15	0	2	72	20	0	2	87	11	0	2	60	18	0	2	81	13	0	2	66	Y	74	3.772	1
+Contig53_chr37_6665763_6665919	116	C	T	111.0	chr37	6665875	C	9	0	2	54	9	0	2	54	5	0	2	42	9	0	2	54	8	0	2	51	10	0	2	57	N	15	10.875	1
+Contig2_chr37_31197993_31198256	182	C	T	39.6	chr37	31198171	T	6	0	2	45	10	0	2	57	7	0	2	48	9	0	2	54	10	0	2	57	12	0	2	63	N	2	0.595	0
+Contig7_chr38_12217200_12218387	1163	A	T	44.4	chr38	12218353	A	11	0	2	60	13	0	2	66	17	0	2	78	10	0	2	57	11	0	2	60	11	0	2	60	Y	67	+99.	0
+Contig265_chrX_2689247_2689484	114	C	G	103.0	chrX	2689356	C	11	0	2	60	9	0	2	54	13	0	2	66	16	0	2	75	14	0	2	69	10	0	2	57	N	2	9.232	1
+Contig113_chrX_26287829_26288398	385	C	T	59.6	chrX	26288213	C	9	0	2	54	9	0	2	54	17	0	2	78	11	0	2	60	3	8	1	44	4	0	2	39	N	13	0.077	0
+Contig90_chrX_57430715_57431566	548	C	T	116.0	chrX	57431266	T	9	0	2	54	18	0	2	81	13	0	2	66	14	0	2	69	8	0	2	54	7	0	2	48	Y	261	0.154	1
+Contig133_chrX_84833782_84834125	182	G	A	69.7	chrX	84833962	G	5	0	2	42	18	0	2	81	12	0	2	63	19	0	2	84	6	3	1	27	7	0	2	48	N	619	0.278	0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/test-data/test_out/specify_restriction_enzymes/specify_restriction_enzymes.gd_snp	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,10 @@
+#{"column_names":["scaf","pos","A","B","qual","ref","rpos","rnuc","1A","1B","1G","1Q","2A","2B","2G","2Q","3A","3B","3G","3Q","4A","4B","4G","4Q","5A","5B","5G","5Q","6A","6B","6G","6Q","pair","dist",
+#"prim","rflp"],"dbkey":"canFam2","individuals":[["PB1",9],["PB2",13],["PB3",17],["PB4",21],["PB6",25],["PB8",29]],"pos":2,"rPos":7,"ref":6,"scaffold":1,"species":"bear"}
+Contig47_chr2_25470778_25471576	126	G	A	888.0	chr2	25470896	G	12	0	2	63	14	0	2	69	14	0	2	69	10	0	2	57	18	0	2	81	13	0	2	66	N	11	0.289	1
+Contig73_chr9_29451535_29452248	616	A	G	24.7	chr9	29452127	G	4	0	2	39	7	0	2	48	1	0	2	30	4	0	2	39	7	0	2	48	6	0	2	45	N	49	0.448	4
+Contig69_chr10_40547265_40548153	371	G	A	58.1	chr10	40547649	A	9	0	2	54	8	0	2	51	8	0	2	51	9	0	2	54	4	0	2	39	5	0	2	42	Y	20	0.138	4
+Contig99_chr17_26021506_26022200	505	C	T	88.8	chr17	26022017	T	15	0	2	72	13	0	2	66	19	0	2	84	9	0	2	54	10	0	2	57	11	0	2	60	Y	1	0.172	1
+Contig27_chr17_61713766_61716585	1056	G	C	40.0	chr17	61714821	G	4	0	2	39	8	0	2	51	10	0	2	57	6	0	2	45	6	0	2	45	3	0	2	36	N	6	2.200	4
+Contig26_chr22_57817664_57819633	1453	A	G	150.0	chr22	57819121	G	9	0	2	54	9	0	2	54	13	0	2	66	15	0	2	72	11	0	2	60	14	0	2	69	N	15	0.471	1
+Contig103_chr25_38891221_38892140	407	G	A	131.0	chr25	38891644	G	8	0	2	51	14	0	2	69	18	0	2	81	8	0	2	51	8	0	2	51	11	0	2	60	Y	149	0.167	4
+Contig64_chr27_34654435_34654621	132	C	A	115.0	chr27	34654567	T	2	0	2	33	2	0	2	33	5	0	2	42	3	0	2	36	3	0	2	36	8	0	2	51	N	12	0.297	1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/tool-data/gd.heterochromatic.loc.sample	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,2 @@
+# ref_species   heterochromatic_file
+#canFam2	/galaxy/local_data/genome_diversity/dpmix/canFam2_heterochrom.txt
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/tool-data/gd.oscar.loc.sample	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,4 @@
+#<species>	<data_file>
+#hg19	/galaxy/local_data/genome_diversity/oscar/hsa_ENSEMBLcKEGGctpthw.tsv
+#bosTau4	/galaxy/local_data/genome_diversity/oscar/bta_ENSEMBLcKEGGctpthw.tsv
+#canFam2	/galaxy/local_data/genome_diversity/oscar/cfa_ENSEMBLcKEGGctpthw.tsv
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/tool-data/gd.pathways.txt.sample	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,721 @@
+hg19	hsa00010	hsa00010 - Glycolysis/ Gluconeogenesis
+hg19	hsa00020	hsa00020 - Citratecycle (TCA cycle)
+hg19	hsa00030	hsa00030 - Pentosephosphate pathway
+hg19	hsa00040	hsa00040 - Pentoseand glucuronate interconversions
+hg19	hsa00051	hsa00051 - Fructoseand mannose metabolism
+hg19	hsa00052	hsa00052 - Galactosemetabolism
+hg19	hsa00053	hsa00053 - Ascorbateand aldarate metabolism
+hg19	hsa00061	hsa00061 - Fattyacid biosynthesis
+hg19	hsa00062	hsa00062 - Fattyacid elongation
+hg19	hsa00071	hsa00071 - Fattyacid metabolism
+hg19	hsa00072	hsa00072 - Synthesisand degradation of ketone bodies
+hg19	hsa00100	hsa00100 - Steroidbiosynthesis
+hg19	hsa00120	hsa00120 - Primarybile acid biosynthesis
+hg19	hsa00130	hsa00130 - Ubiquinoneand other terpenoid-quinone biosynthesis
+hg19	hsa00140	hsa00140 - Steroidhormone biosynthesis
+hg19	hsa00190	hsa00190 - Oxidativephosphorylation
+hg19	hsa00230	hsa00230 - Purinemetabolism
+hg19	hsa00232	hsa00232 - Caffeinemetabolism
+hg19	hsa00240	hsa00240 - Pyrimidinemetabolism
+hg19	hsa00250	hsa00250 - Alanine,aspartate and glutamate metabolism
+hg19	hsa00260	hsa00260 - Glycine,serine and threonine metabolism
+hg19	hsa00270	hsa00270 - Cysteineand methionine metabolism
+hg19	hsa00280	hsa00280 - Valine,leucine and isoleucine degradation
+hg19	hsa00290	hsa00290 - Valine,leucine and isoleucine biosynthesis
+hg19	hsa00300	hsa00300 - Lysinebiosynthesis
+hg19	hsa00310	hsa00310 - Lysinedegradation
+hg19	hsa00330	hsa00330 - Arginineand proline metabolism
+hg19	hsa00340	hsa00340 - Histidinemetabolism
+hg19	hsa00350	hsa00350 - Tyrosinemetabolism
+hg19	hsa00360	hsa00360 - Phenylalaninemetabolism
+hg19	hsa00380	hsa00380 - Tryptophanmetabolism
+hg19	hsa00400	hsa00400 - Phenylalanine,tyrosine and tryptophan biosynthesis
+hg19	hsa00410	hsa00410 - beta-Alaninemetabolism
+hg19	hsa00430	hsa00430 - Taurineand hypotaurine metabolism
+hg19	hsa00450	hsa00450 - Selenocompoundmetabolism
+hg19	hsa00460	hsa00460 - Cyanoaminoacid metabolism
+hg19	hsa00480	hsa00480 - Glutathionemetabolism
+hg19	hsa00500	hsa00500 - Starchand sucrose metabolism
+hg19	hsa00510	hsa00510 - N-Glycanbiosynthesis
+hg19	hsa00511	hsa00511 - Otherglycan degradation
+hg19	hsa00512	hsa00512 - Mucintype O-Glycan biosynthesis
+hg19	hsa00514	hsa00514 - Othertypes of O-glycan biosynthesis
+hg19	hsa00520	hsa00520 - Aminosugar and nucleotide sugar metabolism
+hg19	hsa00524	hsa00524 - Butirosinand neomycin biosynthesis
+hg19	hsa00531	hsa00531 - Glycosaminoglycandegradation
+hg19	hsa00532	hsa00532 - Glycosaminoglycanbiosynthesis - chondroitin sulfate
+hg19	hsa00533	hsa00533 - Glycosaminoglycanbiosynthesis - keratan sulfate
+hg19	hsa00534	hsa00534 - Glycosaminoglycanbiosynthesis - heparan sulfate
+hg19	hsa00561	hsa00561 - Glycerolipidmetabolism
+hg19	hsa00562	hsa00562 - Inositolphosphate metabolism
+hg19	hsa00563	hsa00563 - Glycosylphosphatidylinositol(GPI)-anchorbiosynthesis
+hg19	hsa00564	hsa00564 - Glycerophospholipidmetabolism
+hg19	hsa00565	hsa00565 - Etherlipid metabolism
+hg19	hsa00590	hsa00590 - Arachidonicacid metabolism
+hg19	hsa00591	hsa00591 - Linoleicacid metabolism
+hg19	hsa00592	hsa00592 - alpha-Linolenicacid metabolism
+hg19	hsa00600	hsa00600 - Sphingolipidmetabolism
+hg19	hsa00601	hsa00601 - Glycosphingolipidbiosynthesis - lacto and neolacto series
+hg19	hsa00603	hsa00603 - Glycosphingolipidbiosynthesis - globo series
+hg19	hsa00604	hsa00604 - Glycosphingolipidbiosynthesis - ganglio series
+hg19	hsa00620	hsa00620 - Pyruvatemetabolism
+hg19	hsa00630	hsa00630 - Glyoxylateand dicarboxylate metabolism
+hg19	hsa00640	hsa00640 - Propanoatemetabolism
+hg19	hsa00650	hsa00650 - Butanoatemetabolism
+hg19	hsa00670	hsa00670 - Onecarbon pool by folate
+hg19	hsa00730	hsa00730 - Thiaminemetabolism
+hg19	hsa00740	hsa00740 - Riboflavinmetabolism
+hg19	hsa00750	hsa00750 - VitaminB6 metabolism
+hg19	hsa00760	hsa00760 - Nicotinateand nicotinamide metabolism
+hg19	hsa00770	hsa00770 - Pantothenateand CoA biosynthesis
+hg19	hsa00780	hsa00780 - Biotinmetabolism
+hg19	hsa00785	hsa00785 - Lipoicacid metabolism
+hg19	hsa00790	hsa00790 - Folatebiosynthesis
+hg19	hsa00830	hsa00830 - Retinolmetabolism
+hg19	hsa00860	hsa00860 - Porphyrinand chlorophyll metabolism
+hg19	hsa00900	hsa00900 - Terpenoidbackbone biosynthesis
+hg19	hsa00910	hsa00910 - Nitrogenmetabolism
+hg19	hsa00920	hsa00920 - Sulfurmetabolism
+hg19	hsa00970	hsa00970 - Aminoacyl-tRNAbiosynthesis
+hg19	hsa00980	hsa00980 - Metabolismof xenobiotics by cytochrome P450
+hg19	hsa00982	hsa00982 - Drugmetabolism - cytochrome P450
+hg19	hsa00983	hsa00983 - Drugmetabolism - other enzymes
+hg19	hsa01040	hsa01040 - Biosynthesisof unsaturated fatty acids
+hg19	hsa01100	hsa01100 - Metabolicpathways
+hg19	hsa02010	hsa02010 - ABCtransporters
+hg19	hsa03008	hsa03008 - Ribosomebiogenesis in eukaryotes
+hg19	hsa03010	hsa03010 - Ribosome
+hg19	hsa03013	hsa03013 - RNAtransport
+hg19	hsa03015	hsa03015 - mRNAsurveillance pathway
+hg19	hsa03018	hsa03018 - RNAdegradation
+hg19	hsa03020	hsa03020 - RNApolymerase
+hg19	hsa03022	hsa03022 - Basaltranscription factors
+hg19	hsa03030	hsa03030 - DNAreplication
+hg19	hsa03040	hsa03040 - Spliceosome
+hg19	hsa03050	hsa03050 - Proteasome
+hg19	hsa03060	hsa03060 - Proteinexport
+hg19	hsa03320	hsa03320 - PPARsignaling pathway
+hg19	hsa03410	hsa03410 - Baseexcision repair
+hg19	hsa03420	hsa03420 - Nucleotideexcision repair
+hg19	hsa03430	hsa03430 - Mismatchrepair
+hg19	hsa03440	hsa03440 - Homologousrecombination
+hg19	hsa03450	hsa03450 - Non-homologousend-joining
+hg19	hsa03460	hsa03460 - Fanconianemia pathway
+hg19	hsa04010	hsa04010 - MAPKsignaling pathway
+hg19	hsa04012	hsa04012 - ErbBsignaling pathway
+hg19	hsa04020	hsa04020 - Calciumsignaling pathway
+hg19	hsa04060	hsa04060 - Cytokine-cytokinereceptor interaction
+hg19	hsa04062	hsa04062 - Chemokinesignaling pathway
+hg19	hsa04070	hsa04070 - Phosphatidylinositolsignaling system
+hg19	hsa04080	hsa04080 - Neuroactiveligand-receptor interaction
+hg19	hsa04110	hsa04110 - Cellcycle
+hg19	hsa04114	hsa04114 - Oocytemeiosis
+hg19	hsa04115	hsa04115 - p53signaling pathway
+hg19	hsa04120	hsa04120 - Ubiquitinmediated proteolysis
+hg19	hsa04122	hsa04122 - Sulfurrelay system
+hg19	hsa04130	hsa04130 - SNAREinteractions in vesicular transport
+hg19	hsa04140	hsa04140 - Regulationof autophagy
+hg19	hsa04141	hsa04141 - Proteinprocessing in endoplasmic reticulum
+hg19	hsa04142	hsa04142 - Lysosome
+hg19	hsa04144	hsa04144 - Endocytosis
+hg19	hsa04145	hsa04145 - Phagosome
+hg19	hsa04146	hsa04146 - Peroxisome
+hg19	hsa04150	hsa04150 - mTORsignaling pathway
+hg19	hsa04210	hsa04210 - Apoptosis
+hg19	hsa04260	hsa04260 - Cardiacmuscle contraction
+hg19	hsa04270	hsa04270 - Vascularsmooth muscle contraction
+hg19	hsa04310	hsa04310 - Wntsignaling pathway
+hg19	hsa04320	hsa04320 - Dorso-ventralaxis formation
+hg19	hsa04330	hsa04330 - Notchsignaling pathway
+hg19	hsa04340	hsa04340 - Hedgehogsignaling pathway
+hg19	hsa04350	hsa04350 - TGF-betasignaling pathway
+hg19	hsa04360	hsa04360 - Axonguidance
+hg19	hsa04370	hsa04370 - VEGFsignaling pathway
+hg19	hsa04380	hsa04380 - Osteoclastdifferentiation
+hg19	hsa04510	hsa04510 - Focaladhesion
+hg19	hsa04512	hsa04512 - ECM-receptorinteraction
+hg19	hsa04514	hsa04514 - Celladhesion molecules (CAMs)
+hg19	hsa04520	hsa04520 - Adherensjunction
+hg19	hsa04530	hsa04530 - Tightjunction
+hg19	hsa04540	hsa04540 - Gapjunction
+hg19	hsa04610	hsa04610 - Complementand coagulation cascades
+hg19	hsa04612	hsa04612 - Antigenprocessing and presentation
+hg19	hsa04614	hsa04614 - Renin-angiotensinsystem
+hg19	hsa04620	hsa04620 - Toll-likereceptor signaling pathway
+hg19	hsa04621	hsa04621 - NOD-likereceptor signaling pathway
+hg19	hsa04622	hsa04622 - RIG-I-likereceptor signaling pathway
+hg19	hsa04623	hsa04623 - CytosolicDNA-sensing pathway
+hg19	hsa04630	hsa04630 - Jak-STATsignaling pathway
+hg19	hsa04640	hsa04640 - Hematopoieticcell lineage
+hg19	hsa04650	hsa04650 - Naturalkiller cell mediated cytotoxicity
+hg19	hsa04660	hsa04660 - Tcell receptor signaling pathway
+hg19	hsa04662	hsa04662 - Bcell receptor signaling pathway
+hg19	hsa04664	hsa04664 - Fcepsilon RI signaling pathway
+hg19	hsa04666	hsa04666 - Fcgamma R-mediated phagocytosis
+hg19	hsa04670	hsa04670 - Leukocytetransendothelial migration
+hg19	hsa04672	hsa04672 - Intestinalimmune network for IgA production
+hg19	hsa04710	hsa04710 - Circadianrhythm - mammal
+hg19	hsa04720	hsa04720 - Long-termpotentiation
+hg19	hsa04721	hsa04721 - Synapticvesicle cycle
+hg19	hsa04722	hsa04722 - Neurotrophinsignaling pathway
+hg19	hsa04724	hsa04724 - Glutamatergicsynapse
+hg19	hsa04725	hsa04725 - Cholinergicsynapse
+hg19	hsa04727	hsa04727 - GABAergicsynapse
+hg19	hsa04728	hsa04728 - Dopaminergicsynapse
+hg19	hsa04730	hsa04730 - Long-termdepression
+hg19	hsa04740	hsa04740 - Olfactorytransduction
+hg19	hsa04742	hsa04742 - Tastetransduction
+hg19	hsa04744	hsa04744 - Phototransduction
+hg19	hsa04810	hsa04810 - Regulationof actin cytoskeleton
+hg19	hsa04910	hsa04910 - Insulinsignaling pathway
+hg19	hsa04912	hsa04912 - GnRHsignaling pathway
+hg19	hsa04914	hsa04914 - Progesterone-mediatedoocyte maturation
+hg19	hsa04916	hsa04916 - Melanogenesis
+hg19	hsa04920	hsa04920 - Adipocytokinesignaling pathway
+hg19	hsa04930	hsa04930 - TypeII diabetes mellitus
+hg19	hsa04940	hsa04940 - TypeI diabetes mellitus
+hg19	hsa04950	hsa04950 - Maturityonset diabetes of the young
+hg19	hsa04960	hsa04960 - Aldosterone-regulatedsodium reabsorption
+hg19	hsa04961	hsa04961 - Endocrineand other factor-regulated calcium reabsorption
+hg19	hsa04962	hsa04962 - Vasopressin-regulatedwater reabsorption
+hg19	hsa04964	hsa04964 - Proximaltubule bicarbonate reclamation
+hg19	hsa04966	hsa04966 - Collectingduct acid secretion
+hg19	hsa04970	hsa04970 - Salivarysecretion
+hg19	hsa04971	hsa04971 - Gastricacid secretion
+hg19	hsa04972	hsa04972 - Pancreaticsecretion
+hg19	hsa04973	hsa04973 - Carbohydratedigestion and absorption
+hg19	hsa04974	hsa04974 - Proteindigestion and absorption
+hg19	hsa04975	hsa04975 - Fatdigestion and absorption
+hg19	hsa04976	hsa04976 - Bilesecretion
+hg19	hsa04977	hsa04977 - Vitamindigestion and absorption
+hg19	hsa04978	hsa04978 - Mineralabsorption
+hg19	hsa05010	hsa05010 - Alzheimer'sdisease
+hg19	hsa05012	hsa05012 - Parkinson'sdisease
+hg19	hsa05014	hsa05014 - Amyotrophiclateral sclerosis (ALS)
+hg19	hsa05016	hsa05016 - Huntington'sdisease
+hg19	hsa05020	hsa05020 - Priondiseases
+hg19	hsa05100	hsa05100 - Bacterialinvasion of epithelial cells
+hg19	hsa05132	hsa05132 - Salmonellainfection
+hg19	hsa05133	hsa05133 - Pertussis
+hg19	hsa05134	hsa05134 - Legionellosis
+hg19	hsa05140	hsa05140 - Leishmaniasis
+hg19	hsa05142	hsa05142 - Chagasdisease (American trypanosomiasis)
+hg19	hsa05143	hsa05143 - Africantrypanosomiasis
+hg19	hsa05144	hsa05144 - Malaria
+hg19	hsa05145	hsa05145 - Toxoplasmosis
+hg19	hsa05146	hsa05146 - Amoebiasis
+hg19	hsa05150	hsa05150 - Staphylococcusaureus infection
+hg19	hsa05152	hsa05152 - Tuberculosis
+hg19	hsa05160	hsa05160 - HepatitisC
+hg19	hsa05162	hsa05162 - Measles
+hg19	hsa05164	hsa05164 - InfluenzaA
+hg19	hsa05166	hsa05166 - HTLV-Iinfection
+hg19	hsa05168	hsa05168 - Herpessimplex infection
+hg19	hsa05200	hsa05200 - Pathwaysin cancer
+hg19	hsa05202	hsa05202 - Transcriptionalmisregulation in cancers
+hg19	hsa05210	hsa05210 - Colorectalcancer
+hg19	hsa05211	hsa05211 - Renalcell carcinoma
+hg19	hsa05212	hsa05212 - Pancreaticcancer
+hg19	hsa05213	hsa05213 - Endometrialcancer
+hg19	hsa05214	hsa05214 - Glioma
+hg19	hsa05215	hsa05215 - Prostatecancer
+hg19	hsa05216	hsa05216 - Thyroidcancer
+hg19	hsa05217	hsa05217 - Basalcell carcinoma
+hg19	hsa05218	hsa05218 - Melanoma
+hg19	hsa05219	hsa05219 - Bladdercancer
+hg19	hsa05220	hsa05220 - Chronicmyeloid leukemia
+hg19	hsa05221	hsa05221 - Acutemyeloid leukemia
+hg19	hsa05222	hsa05222 - Smallcell lung cancer
+hg19	hsa05223	hsa05223 - Non-smallcell lung cancer
+hg19	hsa05310	hsa05310 - Asthma
+hg19	hsa05320	hsa05320 - Autoimmunethyroid disease
+hg19	hsa05322	hsa05322 - Systemiclupus erythematosus
+hg19	hsa05323	hsa05323 - Rheumatoidarthritis
+hg19	hsa05330	hsa05330 - Allograftrejection
+hg19	hsa05332	hsa05332 - Graft-versus-hostdisease
+hg19	hsa05340	hsa05340 - Primaryimmunodeficiency
+hg19	hsa05410	hsa05410 - Hypertrophiccardiomyopathy (HCM)
+hg19	hsa05412	hsa05412 - Arrhythmogenicright ventricular cardiomyopathy (ARVC)
+hg19	hsa05414	hsa05414 - Dilatedcardiomyopathy
+hg19	hsa05416	hsa05416 - Viralmyocarditis
+canFam2	cfa00010	cfa00010 - Glycolysis/ Gluconeogenesis
+canFam2	cfa00020	cfa00020 - Citratecycle (TCA cycle)
+canFam2	cfa00030	cfa00030 - Pentosephosphate pathway
+canFam2	cfa00040	cfa00040 - Pentoseand glucuronate interconversions
+canFam2	cfa00051	cfa00051 - Fructoseand mannose metabolism
+canFam2	cfa00052	cfa00052 - Galactosemetabolism
+canFam2	cfa00053	cfa00053 - Ascorbateand aldarate metabolism
+canFam2	cfa00061	cfa00061 - Fattyacid biosynthesis
+canFam2	cfa00062	cfa00062 - Fattyacid elongation in mitochondria
+canFam2	cfa00071	cfa00071 - Fattyacid metabolism
+canFam2	cfa00072	cfa00072 - Synthesisand degradation of ketone bodies
+canFam2	cfa00100	cfa00100 - Steroidbiosynthesis
+canFam2	cfa00120	cfa00120 - Primarybile acid biosynthesis
+canFam2	cfa00130	cfa00130 - Ubiquinoneand other terpenoid-quinone biosynthesis
+canFam2	cfa00140	cfa00140 - Steroidhormone biosynthesis
+canFam2	cfa00190	cfa00190 - Oxidativephosphorylation
+canFam2	cfa00230	cfa00230 - Purinemetabolism
+canFam2	cfa00232	cfa00232 - Caffeinemetabolism
+canFam2	cfa00240	cfa00240 - Pyrimidinemetabolism
+canFam2	cfa00250	cfa00250 - Alanine,aspartate and glutamate metabolism
+canFam2	cfa00260	cfa00260 - Glycine,serine and threonine metabolism
+canFam2	cfa00270	cfa00270 - Cysteineand methionine metabolism
+canFam2	cfa00280	cfa00280 - Valine,leucine and isoleucine degradation
+canFam2	cfa00290	cfa00290 - Valine,leucine and isoleucine biosynthesis
+canFam2	cfa00300	cfa00300 - Lysinebiosynthesis
+canFam2	cfa00310	cfa00310 - Lysinedegradation
+canFam2	cfa00330	cfa00330 - Arginineand proline metabolism
+canFam2	cfa00340	cfa00340 - Histidinemetabolism
+canFam2	cfa00350	cfa00350 - Tyrosinemetabolism
+canFam2	cfa00360	cfa00360 - Phenylalaninemetabolism
+canFam2	cfa00380	cfa00380 - Tryptophanmetabolism
+canFam2	cfa00400	cfa00400 - Phenylalanine,tyrosine and tryptophan biosynthesis
+canFam2	cfa00410	cfa00410 - beta-Alaninemetabolism
+canFam2	cfa00430	cfa00430 - Taurineand hypotaurine metabolism
+canFam2	cfa00450	cfa00450 - Selenocompoundmetabolism
+canFam2	cfa00460	cfa00460 - Cyanoaminoacid metabolism
+canFam2	cfa00472	cfa00472 - D-Arginineand D-ornithine metabolism
+canFam2	cfa00480	cfa00480 - Glutathionemetabolism
+canFam2	cfa00500	cfa00500 - Starchand sucrose metabolism
+canFam2	cfa00510	cfa00510 - N-Glycanbiosynthesis
+canFam2	cfa00511	cfa00511 - Otherglycan degradation
+canFam2	cfa00512	cfa00512 - Mucintype O-Glycan biosynthesis
+canFam2	cfa00514	cfa00514 - Othertypes of O-glycan biosynthesis
+canFam2	cfa00520	cfa00520 - Aminosugar and nucleotide sugar metabolism
+canFam2	cfa00531	cfa00531 - Glycosaminoglycandegradation
+canFam2	cfa00532	cfa00532 - Glycosaminoglycanbiosynthesis - chondroitin sulfate
+canFam2	cfa00533	cfa00533 - Glycosaminoglycanbiosynthesis - keratan sulfate
+canFam2	cfa00534	cfa00534 - Glycosaminoglycanbiosynthesis - heparan sulfate
+canFam2	cfa00561	cfa00561 - Glycerolipidmetabolism
+canFam2	cfa00562	cfa00562 - Inositolphosphate metabolism
+canFam2	cfa00563	cfa00563 - Glycosylphosphatidylinositol(GPI)-anchorbiosynthesis
+canFam2	cfa00564	cfa00564 - Glycerophospholipidmetabolism
+canFam2	cfa00565	cfa00565 - Etherlipid metabolism
+canFam2	cfa00590	cfa00590 - Arachidonicacid metabolism
+canFam2	cfa00591	cfa00591 - Linoleicacid metabolism
+canFam2	cfa00592	cfa00592 - alpha-Linolenicacid metabolism
+canFam2	cfa00600	cfa00600 - Sphingolipidmetabolism
+canFam2	cfa00601	cfa00601 - Glycosphingolipidbiosynthesis - lacto and neolacto series
+canFam2	cfa00603	cfa00603 - Glycosphingolipidbiosynthesis - globo series
+canFam2	cfa00604	cfa00604 - Glycosphingolipidbiosynthesis - ganglio series
+canFam2	cfa00620	cfa00620 - Pyruvatemetabolism
+canFam2	cfa00630	cfa00630 - Glyoxylateand dicarboxylate metabolism
+canFam2	cfa00640	cfa00640 - Propanoatemetabolism
+canFam2	cfa00650	cfa00650 - Butanoatemetabolism
+canFam2	cfa00670	cfa00670 - Onecarbon pool by folate
+canFam2	cfa00730	cfa00730 - Thiaminemetabolism
+canFam2	cfa00740	cfa00740 - Riboflavinmetabolism
+canFam2	cfa00750	cfa00750 - VitaminB6 metabolism
+canFam2	cfa00760	cfa00760 - Nicotinateand nicotinamide metabolism
+canFam2	cfa00770	cfa00770 - Pantothenateand CoA biosynthesis
+canFam2	cfa00780	cfa00780 - Biotinmetabolism
+canFam2	cfa00785	cfa00785 - Lipoicacid metabolism
+canFam2	cfa00790	cfa00790 - Folatebiosynthesis
+canFam2	cfa00830	cfa00830 - Retinolmetabolism
+canFam2	cfa00860	cfa00860 - Porphyrinand chlorophyll metabolism
+canFam2	cfa00900	cfa00900 - Terpenoidbackbone biosynthesis
+canFam2	cfa00910	cfa00910 - Nitrogenmetabolism
+canFam2	cfa00920	cfa00920 - Sulfurmetabolism
+canFam2	cfa00970	cfa00970 - Aminoacyl-tRNAbiosynthesis
+canFam2	cfa00980	cfa00980 - Metabolismof xenobiotics by cytochrome P450
+canFam2	cfa00982	cfa00982 - Drugmetabolism - cytochrome P450
+canFam2	cfa00983	cfa00983 - Drugmetabolism - other enzymes
+canFam2	cfa01040	cfa01040 - Biosynthesisof unsaturated fatty acids
+canFam2	cfa01100	cfa01100 - Metabolicpathways
+canFam2	cfa02010	cfa02010 - ABCtransporters
+canFam2	cfa03008	cfa03008 - Ribosomebiogenesis in eukaryotes
+canFam2	cfa03010	cfa03010 - Ribosome
+canFam2	cfa03013	cfa03013 - RNAtransport
+canFam2	cfa03015	cfa03015 - mRNAsurveillance pathway
+canFam2	cfa03018	cfa03018 - RNAdegradation
+canFam2	cfa03020	cfa03020 - RNApolymerase
+canFam2	cfa03022	cfa03022 - Basaltranscription factors
+canFam2	cfa03030	cfa03030 - DNAreplication
+canFam2	cfa03040	cfa03040 - Spliceosome
+canFam2	cfa03050	cfa03050 - Proteasome
+canFam2	cfa03060	cfa03060 - Proteinexport
+canFam2	cfa03320	cfa03320 - PPARsignaling pathway
+canFam2	cfa03410	cfa03410 - Baseexcision repair
+canFam2	cfa03420	cfa03420 - Nucleotideexcision repair
+canFam2	cfa03430	cfa03430 - Mismatchrepair
+canFam2	cfa03440	cfa03440 - Homologousrecombination
+canFam2	cfa03450	cfa03450 - Non-homologousend-joining
+canFam2	cfa03460	cfa03460 - Fanconianemia pathway
+canFam2	cfa04010	cfa04010 - MAPKsignaling pathway
+canFam2	cfa04012	cfa04012 - ErbBsignaling pathway
+canFam2	cfa04020	cfa04020 - Calciumsignaling pathway
+canFam2	cfa04060	cfa04060 - Cytokine-cytokinereceptor interaction
+canFam2	cfa04062	cfa04062 - Chemokinesignaling pathway
+canFam2	cfa04070	cfa04070 - Phosphatidylinositolsignaling system
+canFam2	cfa04080	cfa04080 - Neuroactiveligand-receptor interaction
+canFam2	cfa04110	cfa04110 - Cellcycle
+canFam2	cfa04114	cfa04114 - Oocytemeiosis
+canFam2	cfa04115	cfa04115 - p53signaling pathway
+canFam2	cfa04120	cfa04120 - Ubiquitinmediated proteolysis
+canFam2	cfa04122	cfa04122 - Sulfurrelay system
+canFam2	cfa04130	cfa04130 - SNAREinteractions in vesicular transport
+canFam2	cfa04140	cfa04140 - Regulationof autophagy
+canFam2	cfa04141	cfa04141 - Proteinprocessing in endoplasmic reticulum
+canFam2	cfa04142	cfa04142 - Lysosome
+canFam2	cfa04144	cfa04144 - Endocytosis
+canFam2	cfa04145	cfa04145 - Phagosome
+canFam2	cfa04146	cfa04146 - Peroxisome
+canFam2	cfa04150	cfa04150 - mTORsignaling pathway
+canFam2	cfa04210	cfa04210 - Apoptosis
+canFam2	cfa04260	cfa04260 - Cardiacmuscle contraction
+canFam2	cfa04270	cfa04270 - Vascularsmooth muscle contraction
+canFam2	cfa04310	cfa04310 - Wntsignaling pathway
+canFam2	cfa04320	cfa04320 - Dorso-ventralaxis formation
+canFam2	cfa04330	cfa04330 - Notchsignaling pathway
+canFam2	cfa04340	cfa04340 - Hedgehogsignaling pathway
+canFam2	cfa04350	cfa04350 - TGF-betasignaling pathway
+canFam2	cfa04360	cfa04360 - Axonguidance
+canFam2	cfa04370	cfa04370 - VEGFsignaling pathway
+canFam2	cfa04380	cfa04380 - Osteoclastdifferentiation
+canFam2	cfa04510	cfa04510 - Focaladhesion
+canFam2	cfa04512	cfa04512 - ECM-receptorinteraction
+canFam2	cfa04514	cfa04514 - Celladhesion molecules (CAMs)
+canFam2	cfa04520	cfa04520 - Adherensjunction
+canFam2	cfa04530	cfa04530 - Tightjunction
+canFam2	cfa04540	cfa04540 - Gapjunction
+canFam2	cfa04610	cfa04610 - Complementand coagulation cascades
+canFam2	cfa04612	cfa04612 - Antigenprocessing and presentation
+canFam2	cfa04614	cfa04614 - Renin-angiotensinsystem
+canFam2	cfa04620	cfa04620 - Toll-likereceptor signaling pathway
+canFam2	cfa04621	cfa04621 - NOD-likereceptor signaling pathway
+canFam2	cfa04622	cfa04622 - RIG-I-likereceptor signaling pathway
+canFam2	cfa04623	cfa04623 - CytosolicDNA-sensing pathway
+canFam2	cfa04630	cfa04630 - Jak-STATsignaling pathway
+canFam2	cfa04640	cfa04640 - Hematopoieticcell lineage
+canFam2	cfa04650	cfa04650 - Naturalkiller cell mediated cytotoxicity
+canFam2	cfa04660	cfa04660 - Tcell receptor signaling pathway
+canFam2	cfa04662	cfa04662 - Bcell receptor signaling pathway
+canFam2	cfa04664	cfa04664 - Fcepsilon RI signaling pathway
+canFam2	cfa04666	cfa04666 - Fcgamma R-mediated phagocytosis
+canFam2	cfa04670	cfa04670 - Leukocytetransendothelial migration
+canFam2	cfa04672	cfa04672 - Intestinalimmune network for IgA production
+canFam2	cfa04710	cfa04710 - Circadianrhythm - mammal
+canFam2	cfa04720	cfa04720 - Long-termpotentiation
+canFam2	cfa04721	cfa04721 - Synapticvesicle cycle
+canFam2	cfa04722	cfa04722 - Neurotrophinsignaling pathway
+canFam2	cfa04724	cfa04724 - Glutamatergicsynapse
+canFam2	cfa04725	cfa04725 - Cholinergicsynapse
+canFam2	cfa04727	cfa04727 - GABAergicsynapse
+canFam2	cfa04728	cfa04728 - Dopaminergicsynapse
+canFam2	cfa04730	cfa04730 - Long-termdepression
+canFam2	cfa04740	cfa04740 - Olfactorytransduction
+canFam2	cfa04742	cfa04742 - Tastetransduction
+canFam2	cfa04744	cfa04744 - Phototransduction
+canFam2	cfa04810	cfa04810 - Regulationof actin cytoskeleton
+canFam2	cfa04910	cfa04910 - Insulinsignaling pathway
+canFam2	cfa04912	cfa04912 - GnRHsignaling pathway
+canFam2	cfa04914	cfa04914 - Progesterone-mediatedoocyte maturation
+canFam2	cfa04916	cfa04916 - Melanogenesis
+canFam2	cfa04920	cfa04920 - Adipocytokinesignaling pathway
+canFam2	cfa04930	cfa04930 - TypeII diabetes mellitus
+canFam2	cfa04940	cfa04940 - TypeI diabetes mellitus
+canFam2	cfa04950	cfa04950 - Maturityonset diabetes of the young
+canFam2	cfa04960	cfa04960 - Aldosterone-regulatedsodium reabsorption
+canFam2	cfa04961	cfa04961 - Endocrineand other factor-regulated calcium reabsorption
+canFam2	cfa04962	cfa04962 - Vasopressin-regulatedwater reabsorption
+canFam2	cfa04964	cfa04964 - Proximaltubule bicarbonate reclamation
+canFam2	cfa04966	cfa04966 - Collectingduct acid secretion
+canFam2	cfa04970	cfa04970 - Salivarysecretion
+canFam2	cfa04971	cfa04971 - Gastricacid secretion
+canFam2	cfa04972	cfa04972 - Pancreaticsecretion
+canFam2	cfa04973	cfa04973 - Carbohydratedigestion and absorption
+canFam2	cfa04974	cfa04974 - Proteindigestion and absorption
+canFam2	cfa04975	cfa04975 - Fatdigestion and absorption
+canFam2	cfa04976	cfa04976 - Bilesecretion
+canFam2	cfa04977	cfa04977 - Vitamindigestion and absorption
+canFam2	cfa04978	cfa04978 - Mineralabsorption
+canFam2	cfa05010	cfa05010 - Alzheimer'sdisease
+canFam2	cfa05012	cfa05012 - Parkinson'sdisease
+canFam2	cfa05014	cfa05014 - Amyotrophiclateral sclerosis (ALS)
+canFam2	cfa05016	cfa05016 - Huntington'sdisease
+canFam2	cfa05020	cfa05020 - Priondiseases
+canFam2	cfa05100	cfa05100 - Bacterialinvasion of epithelial cells
+canFam2	cfa05132	cfa05132 - Salmonellainfection
+canFam2	cfa05133	cfa05133 - Pertussis
+canFam2	cfa05134	cfa05134 - Legionellosis
+canFam2	cfa05140	cfa05140 - Leishmaniasis
+canFam2	cfa05142	cfa05142 - Chagasdisease (American trypanosomiasis)
+canFam2	cfa05143	cfa05143 - Africantrypanosomiasis
+canFam2	cfa05144	cfa05144 - Malaria
+canFam2	cfa05145	cfa05145 - Toxoplasmosis
+canFam2	cfa05146	cfa05146 - Amoebiasis
+canFam2	cfa05150	cfa05150 - Staphylococcusaureus infection
+canFam2	cfa05152	cfa05152 - Tuberculosis
+canFam2	cfa05160	cfa05160 - HepatitisC
+canFam2	cfa05162	cfa05162 - Measles
+canFam2	cfa05164	cfa05164 - InfluenzaA
+canFam2	cfa05166	cfa05166 - HTLV-Iinfection
+canFam2	cfa05168	cfa05168 - Herpessimplex infection
+canFam2	cfa05200	cfa05200 - Pathwaysin cancer
+canFam2	cfa05210	cfa05210 - Colorectalcancer
+canFam2	cfa05211	cfa05211 - Renalcell carcinoma
+canFam2	cfa05212	cfa05212 - Pancreaticcancer
+canFam2	cfa05213	cfa05213 - Endometrialcancer
+canFam2	cfa05214	cfa05214 - Glioma
+canFam2	cfa05215	cfa05215 - Prostatecancer
+canFam2	cfa05216	cfa05216 - Thyroidcancer
+canFam2	cfa05217	cfa05217 - Basalcell carcinoma
+canFam2	cfa05218	cfa05218 - Melanoma
+canFam2	cfa05219	cfa05219 - Bladdercancer
+canFam2	cfa05220	cfa05220 - Chronicmyeloid leukemia
+canFam2	cfa05221	cfa05221 - Acutemyeloid leukemia
+canFam2	cfa05222	cfa05222 - Smallcell lung cancer
+canFam2	cfa05223	cfa05223 - Non-smallcell lung cancer
+canFam2	cfa05310	cfa05310 - Asthma
+canFam2	cfa05320	cfa05320 - Autoimmunethyroid disease
+canFam2	cfa05322	cfa05322 - Systemiclupus erythematosus
+canFam2	cfa05323	cfa05323 - Rheumatoidarthritis
+canFam2	cfa05330	cfa05330 - Allograftrejection
+canFam2	cfa05332	cfa05332 - Graft-versus-hostdisease
+canFam2	cfa05340	cfa05340 - Primaryimmunodeficiency
+canFam2	cfa05410	cfa05410 - Hypertrophiccardiomyopathy (HCM)
+canFam2	cfa05412	cfa05412 - Arrhythmogenicright ventricular cardiomyopathy (ARVC)
+canFam2	cfa05414	cfa05414 - Dilatedcardiomyopathy
+canFam2	cfa05416	cfa05416 - Viralmyocarditis
+bosTau4	bta00010	bta00010 - Glycolysis/ Gluconeogenesis
+bosTau4	bta00020	bta00020 - Citratecycle (TCA cycle)
+bosTau4	bta00030	bta00030 - Pentosephosphate pathway
+bosTau4	bta00040	bta00040 - Pentoseand glucuronate interconversions
+bosTau4	bta00051	bta00051 - Fructoseand mannose metabolism
+bosTau4	bta00052	bta00052 - Galactosemetabolism
+bosTau4	bta00053	bta00053 - Ascorbateand aldarate metabolism
+bosTau4	bta00061	bta00061 - Fattyacid biosynthesis
+bosTau4	bta00062	bta00062 - Fattyacid elongation
+bosTau4	bta00071	bta00071 - Fattyacid metabolism
+bosTau4	bta00072	bta00072 - Synthesisand degradation of ketone bodies
+bosTau4	bta00100	bta00100 - Steroidbiosynthesis
+bosTau4	bta00120	bta00120 - Primarybile acid biosynthesis
+bosTau4	bta00130	bta00130 - Ubiquinoneand other terpenoid-quinone biosynthesis
+bosTau4	bta00140	bta00140 - Steroidhormone biosynthesis
+bosTau4	bta00190	bta00190 - Oxidativephosphorylation
+bosTau4	bta00230	bta00230 - Purinemetabolism
+bosTau4	bta00232	bta00232 - Caffeinemetabolism
+bosTau4	bta00240	bta00240 - Pyrimidinemetabolism
+bosTau4	bta00250	bta00250 - Alanine,aspartate and glutamate metabolism
+bosTau4	bta00260	bta00260 - Glycine,serine and threonine metabolism
+bosTau4	bta00270	bta00270 - Cysteineand methionine metabolism
+bosTau4	bta00280	bta00280 - Valine,leucine and isoleucine degradation
+bosTau4	bta00290	bta00290 - Valine,leucine and isoleucine biosynthesis
+bosTau4	bta00300	bta00300 - Lysinebiosynthesis
+bosTau4	bta00310	bta00310 - Lysinedegradation
+bosTau4	bta00330	bta00330 - Arginineand proline metabolism
+bosTau4	bta00340	bta00340 - Histidinemetabolism
+bosTau4	bta00350	bta00350 - Tyrosinemetabolism
+bosTau4	bta00360	bta00360 - Phenylalaninemetabolism
+bosTau4	bta00380	bta00380 - Tryptophanmetabolism
+bosTau4	bta00400	bta00400 - Phenylalanine,tyrosine and tryptophan biosynthesis
+bosTau4	bta00410	bta00410 - beta-Alaninemetabolism
+bosTau4	bta00430	bta00430 - Taurineand hypotaurine metabolism
+bosTau4	bta00450	bta00450 - Selenocompoundmetabolism
+bosTau4	bta00460	bta00460 - Cyanoaminoacid metabolism
+bosTau4	bta00471	bta00471 - D-Glutamineand D-glutamate metabolism
+bosTau4	bta00472	bta00472 - D-Arginineand D-ornithine metabolism
+bosTau4	bta00480	bta00480 - Glutathionemetabolism
+bosTau4	bta00500	bta00500 - Starchand sucrose metabolism
+bosTau4	bta00510	bta00510 - N-Glycanbiosynthesis
+bosTau4	bta00511	bta00511 - Otherglycan degradation
+bosTau4	bta00512	bta00512 - Mucintype O-Glycan biosynthesis
+bosTau4	bta00514	bta00514 - Othertypes of O-glycan biosynthesis
+bosTau4	bta00520	bta00520 - Aminosugar and nucleotide sugar metabolism
+bosTau4	bta00524	bta00524 - Butirosinand neomycin biosynthesis
+bosTau4	bta00531	bta00531 - Glycosaminoglycandegradation
+bosTau4	bta00532	bta00532 - Glycosaminoglycanbiosynthesis - chondroitin sulfate
+bosTau4	bta00533	bta00533 - Glycosaminoglycanbiosynthesis - keratan sulfate
+bosTau4	bta00534	bta00534 - Glycosaminoglycanbiosynthesis - heparan sulfate
+bosTau4	bta00561	bta00561 - Glycerolipidmetabolism
+bosTau4	bta00562	bta00562 - Inositolphosphate metabolism
+bosTau4	bta00563	bta00563 - Glycosylphosphatidylinositol(GPI)-anchorbiosynthesis
+bosTau4	bta00564	bta00564 - Glycerophospholipidmetabolism
+bosTau4	bta00565	bta00565 - Etherlipid metabolism
+bosTau4	bta00590	bta00590 - Arachidonicacid metabolism
+bosTau4	bta00591	bta00591 - Linoleicacid metabolism
+bosTau4	bta00592	bta00592 - alpha-Linolenicacid metabolism
+bosTau4	bta00600	bta00600 - Sphingolipidmetabolism
+bosTau4	bta00601	bta00601 - Glycosphingolipidbiosynthesis - lacto and neolacto series
+bosTau4	bta00603	bta00603 - Glycosphingolipidbiosynthesis - globo series
+bosTau4	bta00604	bta00604 - Glycosphingolipidbiosynthesis - ganglio series
+bosTau4	bta00620	bta00620 - Pyruvatemetabolism
+bosTau4	bta00630	bta00630 - Glyoxylateand dicarboxylate metabolism
+bosTau4	bta00640	bta00640 - Propanoatemetabolism
+bosTau4	bta00650	bta00650 - Butanoatemetabolism
+bosTau4	bta00670	bta00670 - Onecarbon pool by folate
+bosTau4	bta00730	bta00730 - Thiaminemetabolism
+bosTau4	bta00740	bta00740 - Riboflavinmetabolism
+bosTau4	bta00750	bta00750 - VitaminB6 metabolism
+bosTau4	bta00760	bta00760 - Nicotinateand nicotinamide metabolism
+bosTau4	bta00770	bta00770 - Pantothenateand CoA biosynthesis
+bosTau4	bta00780	bta00780 - Biotinmetabolism
+bosTau4	bta00785	bta00785 - Lipoicacid metabolism
+bosTau4	bta00790	bta00790 - Folatebiosynthesis
+bosTau4	bta00830	bta00830 - Retinolmetabolism
+bosTau4	bta00860	bta00860 - Porphyrinand chlorophyll metabolism
+bosTau4	bta00900	bta00900 - Terpenoidbackbone biosynthesis
+bosTau4	bta00910	bta00910 - Nitrogenmetabolism
+bosTau4	bta00920	bta00920 - Sulfurmetabolism
+bosTau4	bta00970	bta00970 - Aminoacyl-tRNAbiosynthesis
+bosTau4	bta00980	bta00980 - Metabolismof xenobiotics by cytochrome P450
+bosTau4	bta00982	bta00982 - Drugmetabolism - cytochrome P450
+bosTau4	bta00983	bta00983 - Drugmetabolism - other enzymes
+bosTau4	bta01040	bta01040 - Biosynthesisof unsaturated fatty acids
+bosTau4	bta01100	bta01100 - Metabolicpathways
+bosTau4	bta02010	bta02010 - ABCtransporters
+bosTau4	bta03008	bta03008 - Ribosomebiogenesis in eukaryotes
+bosTau4	bta03010	bta03010 - Ribosome
+bosTau4	bta03013	bta03013 - RNAtransport
+bosTau4	bta03015	bta03015 - mRNAsurveillance pathway
+bosTau4	bta03018	bta03018 - RNAdegradation
+bosTau4	bta03020	bta03020 - RNApolymerase
+bosTau4	bta03022	bta03022 - Basaltranscription factors
+bosTau4	bta03030	bta03030 - DNAreplication
+bosTau4	bta03040	bta03040 - Spliceosome
+bosTau4	bta03050	bta03050 - Proteasome
+bosTau4	bta03060	bta03060 - Proteinexport
+bosTau4	bta03320	bta03320 - PPARsignaling pathway
+bosTau4	bta03410	bta03410 - Baseexcision repair
+bosTau4	bta03420	bta03420 - Nucleotideexcision repair
+bosTau4	bta03430	bta03430 - Mismatchrepair
+bosTau4	bta03440	bta03440 - Homologousrecombination
+bosTau4	bta03450	bta03450 - Non-homologousend-joining
+bosTau4	bta03460	bta03460 - Fanconianemia pathway
+bosTau4	bta04010	bta04010 - MAPKsignaling pathway
+bosTau4	bta04012	bta04012 - ErbBsignaling pathway
+bosTau4	bta04020	bta04020 - Calciumsignaling pathway
+bosTau4	bta04060	bta04060 - Cytokine-cytokinereceptor interaction
+bosTau4	bta04062	bta04062 - Chemokinesignaling pathway
+bosTau4	bta04070	bta04070 - Phosphatidylinositolsignaling system
+bosTau4	bta04080	bta04080 - Neuroactiveligand-receptor interaction
+bosTau4	bta04110	bta04110 - Cellcycle
+bosTau4	bta04114	bta04114 - Oocytemeiosis
+bosTau4	bta04115	bta04115 - p53signaling pathway
+bosTau4	bta04120	bta04120 - Ubiquitinmediated proteolysis
+bosTau4	bta04122	bta04122 - Sulfurrelay system
+bosTau4	bta04130	bta04130 - SNAREinteractions in vesicular transport
+bosTau4	bta04140	bta04140 - Regulationof autophagy
+bosTau4	bta04141	bta04141 - Proteinprocessing in endoplasmic reticulum
+bosTau4	bta04142	bta04142 - Lysosome
+bosTau4	bta04144	bta04144 - Endocytosis
+bosTau4	bta04145	bta04145 - Phagosome
+bosTau4	bta04146	bta04146 - Peroxisome
+bosTau4	bta04150	bta04150 - mTORsignaling pathway
+bosTau4	bta04210	bta04210 - Apoptosis
+bosTau4	bta04260	bta04260 - Cardiacmuscle contraction
+bosTau4	bta04270	bta04270 - Vascularsmooth muscle contraction
+bosTau4	bta04310	bta04310 - Wntsignaling pathway
+bosTau4	bta04320	bta04320 - Dorso-ventralaxis formation
+bosTau4	bta04330	bta04330 - Notchsignaling pathway
+bosTau4	bta04340	bta04340 - Hedgehogsignaling pathway
+bosTau4	bta04350	bta04350 - TGF-betasignaling pathway
+bosTau4	bta04360	bta04360 - Axonguidance
+bosTau4	bta04370	bta04370 - VEGFsignaling pathway
+bosTau4	bta04380	bta04380 - Osteoclastdifferentiation
+bosTau4	bta04510	bta04510 - Focaladhesion
+bosTau4	bta04512	bta04512 - ECM-receptorinteraction
+bosTau4	bta04514	bta04514 - Celladhesion molecules (CAMs)
+bosTau4	bta04520	bta04520 - Adherensjunction
+bosTau4	bta04530	bta04530 - Tightjunction
+bosTau4	bta04540	bta04540 - Gapjunction
+bosTau4	bta04610	bta04610 - Complementand coagulation cascades
+bosTau4	bta04612	bta04612 - Antigenprocessing and presentation
+bosTau4	bta04614	bta04614 - Renin-angiotensinsystem
+bosTau4	bta04620	bta04620 - Toll-likereceptor signaling pathway
+bosTau4	bta04621	bta04621 - NOD-likereceptor signaling pathway
+bosTau4	bta04622	bta04622 - RIG-I-likereceptor signaling pathway
+bosTau4	bta04623	bta04623 - CytosolicDNA-sensing pathway
+bosTau4	bta04630	bta04630 - Jak-STATsignaling pathway
+bosTau4	bta04640	bta04640 - Hematopoieticcell lineage
+bosTau4	bta04650	bta04650 - Naturalkiller cell mediated cytotoxicity
+bosTau4	bta04660	bta04660 - Tcell receptor signaling pathway
+bosTau4	bta04662	bta04662 - Bcell receptor signaling pathway
+bosTau4	bta04664	bta04664 - Fcepsilon RI signaling pathway
+bosTau4	bta04666	bta04666 - Fcgamma R-mediated phagocytosis
+bosTau4	bta04670	bta04670 - Leukocytetransendothelial migration
+bosTau4	bta04672	bta04672 - Intestinalimmune network for IgA production
+bosTau4	bta04710	bta04710 - Circadianrhythm - mammal
+bosTau4	bta04720	bta04720 - Long-termpotentiation
+bosTau4	bta04721	bta04721 - Synapticvesicle cycle
+bosTau4	bta04722	bta04722 - Neurotrophinsignaling pathway
+bosTau4	bta04724	bta04724 - Glutamatergicsynapse
+bosTau4	bta04725	bta04725 - Cholinergicsynapse
+bosTau4	bta04727	bta04727 - GABAergicsynapse
+bosTau4	bta04728	bta04728 - Dopaminergicsynapse
+bosTau4	bta04730	bta04730 - Long-termdepression
+bosTau4	bta04740	bta04740 - Olfactorytransduction
+bosTau4	bta04742	bta04742 - Tastetransduction
+bosTau4	bta04744	bta04744 - Phototransduction
+bosTau4	bta04810	bta04810 - Regulationof actin cytoskeleton
+bosTau4	bta04910	bta04910 - Insulinsignaling pathway
+bosTau4	bta04912	bta04912 - GnRHsignaling pathway
+bosTau4	bta04914	bta04914 - Progesterone-mediatedoocyte maturation
+bosTau4	bta04916	bta04916 - Melanogenesis
+bosTau4	bta04920	bta04920 - Adipocytokinesignaling pathway
+bosTau4	bta04930	bta04930 - TypeII diabetes mellitus
+bosTau4	bta04940	bta04940 - TypeI diabetes mellitus
+bosTau4	bta04950	bta04950 - Maturityonset diabetes of the young
+bosTau4	bta04960	bta04960 - Aldosterone-regulatedsodium reabsorption
+bosTau4	bta04961	bta04961 - Endocrineand other factor-regulated calcium reabsorption
+bosTau4	bta04962	bta04962 - Vasopressin-regulatedwater reabsorption
+bosTau4	bta04964	bta04964 - Proximaltubule bicarbonate reclamation
+bosTau4	bta04966	bta04966 - Collectingduct acid secretion
+bosTau4	bta04970	bta04970 - Salivarysecretion
+bosTau4	bta04971	bta04971 - Gastricacid secretion
+bosTau4	bta04972	bta04972 - Pancreaticsecretion
+bosTau4	bta04973	bta04973 - Carbohydratedigestion and absorption
+bosTau4	bta04974	bta04974 - Proteindigestion and absorption
+bosTau4	bta04975	bta04975 - Fatdigestion and absorption
+bosTau4	bta04976	bta04976 - Bilesecretion
+bosTau4	bta04977	bta04977 - Vitamindigestion and absorption
+bosTau4	bta04978	bta04978 - Mineralabsorption
+bosTau4	bta05010	bta05010 - Alzheimer'sdisease
+bosTau4	bta05012	bta05012 - Parkinson'sdisease
+bosTau4	bta05014	bta05014 - Amyotrophiclateral sclerosis (ALS)
+bosTau4	bta05016	bta05016 - Huntington'sdisease
+bosTau4	bta05020	bta05020 - Priondiseases
+bosTau4	bta05100	bta05100 - Bacterialinvasion of epithelial cells
+bosTau4	bta05132	bta05132 - Salmonellainfection
+bosTau4	bta05133	bta05133 - Pertussis
+bosTau4	bta05134	bta05134 - Legionellosis
+bosTau4	bta05140	bta05140 - Leishmaniasis
+bosTau4	bta05142	bta05142 - Chagasdisease (American trypanosomiasis)
+bosTau4	bta05143	bta05143 - Africantrypanosomiasis
+bosTau4	bta05144	bta05144 - Malaria
+bosTau4	bta05145	bta05145 - Toxoplasmosis
+bosTau4	bta05146	bta05146 - Amoebiasis
+bosTau4	bta05150	bta05150 - Staphylococcusaureus infection
+bosTau4	bta05152	bta05152 - Tuberculosis
+bosTau4	bta05160	bta05160 - HepatitisC
+bosTau4	bta05162	bta05162 - Measles
+bosTau4	bta05164	bta05164 - InfluenzaA
+bosTau4	bta05166	bta05166 - HTLV-Iinfection
+bosTau4	bta05168	bta05168 - Herpessimplex infection
+bosTau4	bta05200	bta05200 - Pathwaysin cancer
+bosTau4	bta05202	bta05202 - Transcriptionalmisregulation in cancers
+bosTau4	bta05210	bta05210 - Colorectalcancer
+bosTau4	bta05211	bta05211 - Renalcell carcinoma
+bosTau4	bta05212	bta05212 - Pancreaticcancer
+bosTau4	bta05213	bta05213 - Endometrialcancer
+bosTau4	bta05214	bta05214 - Glioma
+bosTau4	bta05215	bta05215 - Prostatecancer
+bosTau4	bta05216	bta05216 - Thyroidcancer
+bosTau4	bta05217	bta05217 - Basalcell carcinoma
+bosTau4	bta05218	bta05218 - Melanoma
+bosTau4	bta05219	bta05219 - Bladdercancer
+bosTau4	bta05220	bta05220 - Chronicmyeloid leukemia
+bosTau4	bta05221	bta05221 - Acutemyeloid leukemia
+bosTau4	bta05222	bta05222 - Smallcell lung cancer
+bosTau4	bta05223	bta05223 - Non-smallcell lung cancer
+bosTau4	bta05310	bta05310 - Asthma
+bosTau4	bta05320	bta05320 - Autoimmunethyroid disease
+bosTau4	bta05322	bta05322 - Systemiclupus erythematosus
+bosTau4	bta05323	bta05323 - Rheumatoidarthritis
+bosTau4	bta05330	bta05330 - Allograftrejection
+bosTau4	bta05332	bta05332 - Graft-versus-hostdisease
+bosTau4	bta05340	bta05340 - Primaryimmunodeficiency
+bosTau4	bta05410	bta05410 - Hypertrophiccardiomyopathy (HCM)
+bosTau4	bta05412	bta05412 - Arrhythmogenicright ventricular cardiomyopathy (ARVC)
+bosTau4	bta05414	bta05414 - Dilatedcardiomyopathy
+bosTau4	bta05416	bta05416 - Viralmyocarditis
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/tool-data/gd.primers.loc.sample	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,6 @@
+#<species>  <primers_file_path>
+#aye-aye	/galaxy/local_data/genome_diversity/primers/aye-aye_Galaxy_primers.txt
+#bear	/galaxy/local_data/genome_diversity/primers/bear_Galaxy_primers.txt
+#bighorn	/galaxy/local_data/genome_diversity/primers/bighorn_Galaxy_primers.txt
+#tasmanian_devil	/galaxy/local_data/genome_diversity/primers/devil_Galaxy_primers.txt
+#tick	/galaxy/local_data/genome_diversity/primers/tick_Galaxy_primers.txt
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/tool-data/gd.rank.loc.sample	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,4 @@
+#<species> <prefix> <kxml_dir_path> <path_to_dict_file>
+#hg19	hsa	/galaxy/local_data/genome_diversity/rank/KXML_hsa.d	/galaxy/local_data/genome_diversity/rank/hsa_dict.txt
+#canFam2	cfa	/galaxy/local_data/genome_diversity/rank/KXML_cfa.d	/galaxy/local_data/genome_diversity/rank/cfa_dict.txt
+#bosTau4	bta	/galaxy/local_data/genome_diversity/rank/KXML_bta.d	/galaxy/local_data/genome_diversity/rank/bta_dict.txt
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/tool-data/gd.ref_species.txt.sample	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,4 @@
+# genome diversity species
+cow	cow
+hg19	hg19
+dog	dog
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/tool-data/gd.restriction_enzymes.txt.sample	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,99 @@
+Acc65I - GGTACC	Acc65I
+AccB7I - CCANNNNNTGG	AccB7I
+AccI - GT(A/C)(G/T)AC	AccI
+AccIII - TCCGGA	AccIII
+AcyI - G(A/G)CG(C/T)C	AcyI
+AgeI - ACCGGT	AgeI
+AluI - AGCT	AluI
+Alw44I - GTGCAC	Alw44I
+ApaI - GGGCCC	ApaI
+AvaI - C(C/T)CG(A/G)G	AvaI
+AvaII - GG(A/T)CC	AvaII
+BalI - TGGCCA	BalI
+BamHI - GGATCC	BamHI
+BanI - GG(C/T)(A/G)CC	BanI
+BanII - G(A/G)GC(C/T)C	BanII
+BbuI - GCATGC	BbuI
+BclI - TGATCA	BclI
+BglI - GCCNNNNNGGC	BglI
+BglII - AGATCT	BglII
+BsaMI - GAATGC	BsaMI
+BsaOI - CG(A/G)(C/T)CG	BsaOI
+Bsp1286I - G(A/G/T)GC(A/C/T)C	Bsp1286I
+BsrBRI - GATNNNNATC	BsrBRI
+BsrSI - ACTGG	BsrSI
+BssHII - GCGCGC	BssHII
+Bst98I - CTTAAG	Bst98I
+BstEII - GGTNACC	BstEII
+BstOI - CC(A/T)GG	BstOI
+BstXI - CCANNNNNNTGG	BstXI
+BstZI - CGGCCG	BstZI
+Bsu36I - CCTNAGG	Bsu36I
+CfoI - GCGC	CfoI
+ClaI - ATCGAT	ClaI
+Csp45I - TTCGAA	Csp45I
+CspI - CGG(A/T)CCG	CspI
+DdeI - CTNAG	DdeI
+DpnI - GATC	DpnI
+DraI - TTTAAA	DraI
+EclHKI - GACNNNNNGTC	EclHKI
+Eco47III - AGCGCT	Eco47III
+Eco52I - CGGCCG	Eco52I
+Eco72I - CACGTG	Eco72I
+EcoRI - GAATTC	EcoRI
+EcoRV - GATATC	EcoRV
+HaeII - (A/G)GCGC(C/T)	HaeII
+HaeIII - GGCC	HaeIII
+HhaI - GCGC	HhaI
+HincII - GT(C/T)(A/G)AC	HincII
+HindIII - AAGCTT	HindIII
+HinfI - GANTC	HinfI
+HpaI - GTTAAC	HpaI
+HpaII - CCGG	HpaII
+Hsp92I - G(A/G)CG(C/T)C	Hsp92I
+Hsp92II - CATG	Hsp92II
+I-PpoI - TAACTATGACTCTCTTAAGGTAGCCAAAT	I-PpoI
+KpnI - GGTACC	KpnI
+MboI - GATC	MboI
+MluI - ACGCGT	MluI
+MspA1I - C(A/C)GC(G/T)G	MspA1I
+MspI - CCGG	MspI
+NaeI - GCCGGC	NaeI
+NarI - GGCGCC	NarI
+NciI - CC(C/G)GG	NciI
+NcoI - CCATGG	NcoI
+NdeI - CATATG	NdeI
+NgoMIV - GCCGGC	NgoMIV
+NheI - GCTAGC	NheI
+NotI - GCGGCCGC	NotI
+NruI - TCGCGA	NruI
+NsiI - ATGCAT	NsiI
+PstI - CTGCAG	PstI
+PvuI - CGATCG	PvuI
+PvuII - CAGCTG	PvuII
+RsaI - GTAC	RsaI
+SacI - GAGCTC	SacI
+SacII - CCGCGG	SacII
+SalI - GTCGAC	SalI
+Sau3AI - GATC	Sau3AI
+Sau96I - GGNCC	Sau96I
+ScaI - AGTACT	ScaI
+SfiI - GGCCNNNNNGGCC	SfiI
+SgfI - GCGATCGC	SgfI
+SinI - GG(A/T)CC	SinI
+SmaI - CCCGGG	SmaI
+SnaBI - TACGTA	SnaBI
+SpeI - ACTAGT	SpeI
+SphI - GCATGC	SphI
+SspI - AATATT	SspI
+StuI - AGGCCT	StuI
+StyI - CC(A/T)(A/T)GG	StyI
+TaqI - TCGA	TaqI
+Tru9I - TTAA	Tru9I
+Tth111I - GACNNNGTC	Tth111I
+VspI - ATTAAT	VspI
+XbaI - TCTAGA	XbaI
+XhoI - CTCGAG	XhoI
+XhoII - (A/G)GATC(C/T)	XhoII
+XmaI - CCCGGG	XmaI
+XmnI - GAANNNNTTC	XmnI
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/tool-data/gd.snps.loc.sample	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,3 @@
+#<species>  <SNP_call_file_path>
+#bighorn	/galaxy/local_data/genome_diversity/snps/bighorn_snps.txt
+#tasmanian_devil	/galaxy/local_data/genome_diversity/snps/devil_snps.txt
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/tool-data/gd.species.txt.sample	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,6 @@
+# genome diversity species
+aye-aye aye-aye
+bear bear
+bighorn bighorn
+tasmanian_devil Tasmanian devil
+tick tick
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/tool_dependencies.xml	Thu Sep 19 16:40:24 2013 -0400
@@ -0,0 +1,36 @@
+<?xml version="1.0"?>
+<tool_dependency>
+  <package name="beautifulsoup" version="3.2.1">
+    <repository prior_installation_required="True" toolshed="http://testtoolshed.g2.bx.psu.edu/" owner="rico" name="package_beautifulsoup_3_2_1" changeset_revision="d0a48ff511ae" />
+  </package>
+  <package name="eigensoft" version="5.0.1">
+    <repository prior_installation_required="True" toolshed="http://testtoolshed.g2.bx.psu.edu/" owner="rico" name="package_eigensoft_5_0_1" changeset_revision="a2d0066a40f7" />
+  </package>
+  <package name="fisher" version="0.1.4">
+    <repository prior_installation_required="True" toolshed="http://testtoolshed.g2.bx.psu.edu/" owner="rico" name="package_fisher_0_1_4" changeset_revision="e281bf13e9ea" />
+  </package>
+  <package name="gd_c_tools" version="0.1">
+    <repository prior_installation_required="True" toolshed="http://testtoolshed.g2.bx.psu.edu/" owner="rico" name="package_gd_c_tools_0_1" changeset_revision="a06e8f745548" />
+  </package>
+  <package name="matplotlib" version="1.2.1">
+    <repository prior_installation_required="True" toolshed="http://testtoolshed.g2.bx.psu.edu/" owner="iuc" name="package_matplotlib_1_2" changeset_revision="966f29c955b9" />
+  </package>
+  <package name="mechanize" version="0.2.5">
+    <repository prior_installation_required="True" toolshed="http://testtoolshed.g2.bx.psu.edu/" owner="rico" name="package_mechanize_0_2_5" changeset_revision="b1f07eb32d69" />
+  </package>
+  <package name="munkres" version="1.0.5.4">
+    <repository prior_installation_required="True" toolshed="http://testtoolshed.g2.bx.psu.edu/" owner="rico" name="package_munkres_1_0_5_4" changeset_revision="c7fb72de0a20" />
+  </package>
+  <package name="networkx" version="1.8.1">
+    <repository prior_installation_required="True" toolshed="http://testtoolshed.g2.bx.psu.edu/" owner="rico" name="package_networkx_1_8_1" changeset_revision="db90f8ff8df8" />
+  </package>
+  <package name="phast" version="1.3">
+    <repository prior_installation_required="True" toolshed="http://testtoolshed.g2.bx.psu.edu/" owner="rico" name="package_phast_1_3" changeset_revision="0e3a2624a036" />
+  </package>
+  <package name="quicktree" version="1.1">
+    <repository prior_installation_required="True" toolshed="http://testtoolshed.g2.bx.psu.edu/" owner="rico" name="package_quicktree_1_1" changeset_revision="814402743a7f" />
+  </package>
+  <package name="raxml" version="7.7.6">
+    <repository prior_installation_required="True" toolshed="http://testtoolshed.g2.bx.psu.edu/" owner="rico" name="package_raxml_7_7_6" changeset_revision="ca433cb8c74f" />
+  </package>
+</tool_dependency>