Mercurial > repos > rico > all_but_bad
changeset 0:879cbc69a0ee default tip
Uploaded
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BeautifulSoup.py Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,2014 @@ +"""Beautiful Soup +Elixir and Tonic +"The Screen-Scraper's Friend" +http://www.crummy.com/software/BeautifulSoup/ + +Beautiful Soup parses a (possibly invalid) XML or HTML document into a +tree representation. It provides methods and Pythonic idioms that make +it easy to navigate, search, and modify the tree. + +A well-formed XML/HTML document yields a well-formed data +structure. An ill-formed XML/HTML document yields a correspondingly +ill-formed data structure. If your document is only locally +well-formed, you can use this library to find and process the +well-formed part of it. + +Beautiful Soup works with Python 2.2 and up. It has no external +dependencies, but you'll have more success at converting data to UTF-8 +if you also install these three packages: + +* chardet, for auto-detecting character encodings + http://chardet.feedparser.org/ +* cjkcodecs and iconv_codec, which add more encodings to the ones supported + by stock Python. + http://cjkpython.i18n.org/ + +Beautiful Soup defines classes for two main parsing strategies: + + * BeautifulStoneSoup, for parsing XML, SGML, or your domain-specific + language that kind of looks like XML. + + * BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid + or invalid. This class has web browser-like heuristics for + obtaining a sensible parse tree in the face of common HTML errors. + +Beautiful Soup also defines a class (UnicodeDammit) for autodetecting +the encoding of an HTML or XML document, and converting it to +Unicode. Much of this code is taken from Mark Pilgrim's Universal Feed Parser. + +For more than you ever wanted to know about Beautiful Soup, see the +documentation: +http://www.crummy.com/software/BeautifulSoup/documentation.html + +Here, have some legalese: + +Copyright (c) 2004-2010, Leonard Richardson + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the the Beautiful Soup Consortium and All + Night Kosher Bakery nor the names of its contributors may be + used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE, DAMMIT. + +""" +from __future__ import generators + +__author__ = "Leonard Richardson (leonardr@segfault.org)" +__version__ = "3.2.0" +__copyright__ = "Copyright (c) 2004-2010 Leonard Richardson" +__license__ = "New-style BSD" + +from sgmllib import SGMLParser, SGMLParseError +import codecs +import markupbase +import types +import re +import sgmllib +try: + from htmlentitydefs import name2codepoint +except ImportError: + name2codepoint = {} +try: + set +except NameError: + from sets import Set as set + +#These hacks make Beautiful Soup able to parse XML with namespaces +sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*') +markupbase._declname_match = re.compile(r'[a-zA-Z][-_.:a-zA-Z0-9]*\s*').match + +DEFAULT_OUTPUT_ENCODING = "utf-8" + +def _match_css_class(str): + """Build a RE to match the given CSS class.""" + return re.compile(r"(^|.*\s)%s($|\s)" % str) + +# First, the classes that represent markup elements. + +class PageElement(object): + """Contains the navigational information for some part of the page + (either a tag or a piece of text)""" + + def setup(self, parent=None, previous=None): + """Sets up the initial relations between this element and + other elements.""" + self.parent = parent + self.previous = previous + self.next = None + self.previousSibling = None + self.nextSibling = None + if self.parent and self.parent.contents: + self.previousSibling = self.parent.contents[-1] + self.previousSibling.nextSibling = self + + def replaceWith(self, replaceWith): + oldParent = self.parent + myIndex = self.parent.index(self) + if hasattr(replaceWith, "parent")\ + and replaceWith.parent is self.parent: + # We're replacing this element with one of its siblings. + index = replaceWith.parent.index(replaceWith) + if index and index < myIndex: + # Furthermore, it comes before this element. That + # means that when we extract it, the index of this + # element will change. + myIndex = myIndex - 1 + self.extract() + oldParent.insert(myIndex, replaceWith) + + def replaceWithChildren(self): + myParent = self.parent + myIndex = self.parent.index(self) + self.extract() + reversedChildren = list(self.contents) + reversedChildren.reverse() + for child in reversedChildren: + myParent.insert(myIndex, child) + + def extract(self): + """Destructively rips this element out of the tree.""" + if self.parent: + try: + del self.parent.contents[self.parent.index(self)] + except ValueError: + pass + + #Find the two elements that would be next to each other if + #this element (and any children) hadn't been parsed. Connect + #the two. + lastChild = self._lastRecursiveChild() + nextElement = lastChild.next + + if self.previous: + self.previous.next = nextElement + if nextElement: + nextElement.previous = self.previous + self.previous = None + lastChild.next = None + + self.parent = None + if self.previousSibling: + self.previousSibling.nextSibling = self.nextSibling + if self.nextSibling: + self.nextSibling.previousSibling = self.previousSibling + self.previousSibling = self.nextSibling = None + return self + + def _lastRecursiveChild(self): + "Finds the last element beneath this object to be parsed." + lastChild = self + while hasattr(lastChild, 'contents') and lastChild.contents: + lastChild = lastChild.contents[-1] + return lastChild + + def insert(self, position, newChild): + if isinstance(newChild, basestring) \ + and not isinstance(newChild, NavigableString): + newChild = NavigableString(newChild) + + position = min(position, len(self.contents)) + if hasattr(newChild, 'parent') and newChild.parent is not None: + # We're 'inserting' an element that's already one + # of this object's children. + if newChild.parent is self: + index = self.index(newChild) + if index > position: + # Furthermore we're moving it further down the + # list of this object's children. That means that + # when we extract this element, our target index + # will jump down one. + position = position - 1 + newChild.extract() + + newChild.parent = self + previousChild = None + if position == 0: + newChild.previousSibling = None + newChild.previous = self + else: + previousChild = self.contents[position-1] + newChild.previousSibling = previousChild + newChild.previousSibling.nextSibling = newChild + newChild.previous = previousChild._lastRecursiveChild() + if newChild.previous: + newChild.previous.next = newChild + + newChildsLastElement = newChild._lastRecursiveChild() + + if position >= len(self.contents): + newChild.nextSibling = None + + parent = self + parentsNextSibling = None + while not parentsNextSibling: + parentsNextSibling = parent.nextSibling + parent = parent.parent + if not parent: # This is the last element in the document. + break + if parentsNextSibling: + newChildsLastElement.next = parentsNextSibling + else: + newChildsLastElement.next = None + else: + nextChild = self.contents[position] + newChild.nextSibling = nextChild + if newChild.nextSibling: + newChild.nextSibling.previousSibling = newChild + newChildsLastElement.next = nextChild + + if newChildsLastElement.next: + newChildsLastElement.next.previous = newChildsLastElement + self.contents.insert(position, newChild) + + def append(self, tag): + """Appends the given tag to the contents of this tag.""" + self.insert(len(self.contents), tag) + + def findNext(self, name=None, attrs={}, text=None, **kwargs): + """Returns the first item that matches the given criteria and + appears after this Tag in the document.""" + return self._findOne(self.findAllNext, name, attrs, text, **kwargs) + + def findAllNext(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns all items that match the given criteria and appear + after this Tag in the document.""" + return self._findAll(name, attrs, text, limit, self.nextGenerator, + **kwargs) + + def findNextSibling(self, name=None, attrs={}, text=None, **kwargs): + """Returns the closest sibling to this Tag that matches the + given criteria and appears after this Tag in the document.""" + return self._findOne(self.findNextSiblings, name, attrs, text, + **kwargs) + + def findNextSiblings(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns the siblings of this Tag that match the given + criteria and appear after this Tag in the document.""" + return self._findAll(name, attrs, text, limit, + self.nextSiblingGenerator, **kwargs) + fetchNextSiblings = findNextSiblings # Compatibility with pre-3.x + + def findPrevious(self, name=None, attrs={}, text=None, **kwargs): + """Returns the first item that matches the given criteria and + appears before this Tag in the document.""" + return self._findOne(self.findAllPrevious, name, attrs, text, **kwargs) + + def findAllPrevious(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns all items that match the given criteria and appear + before this Tag in the document.""" + return self._findAll(name, attrs, text, limit, self.previousGenerator, + **kwargs) + fetchPrevious = findAllPrevious # Compatibility with pre-3.x + + def findPreviousSibling(self, name=None, attrs={}, text=None, **kwargs): + """Returns the closest sibling to this Tag that matches the + given criteria and appears before this Tag in the document.""" + return self._findOne(self.findPreviousSiblings, name, attrs, text, + **kwargs) + + def findPreviousSiblings(self, name=None, attrs={}, text=None, + limit=None, **kwargs): + """Returns the siblings of this Tag that match the given + criteria and appear before this Tag in the document.""" + return self._findAll(name, attrs, text, limit, + self.previousSiblingGenerator, **kwargs) + fetchPreviousSiblings = findPreviousSiblings # Compatibility with pre-3.x + + def findParent(self, name=None, attrs={}, **kwargs): + """Returns the closest parent of this Tag that matches the given + criteria.""" + # NOTE: We can't use _findOne because findParents takes a different + # set of arguments. + r = None + l = self.findParents(name, attrs, 1) + if l: + r = l[0] + return r + + def findParents(self, name=None, attrs={}, limit=None, **kwargs): + """Returns the parents of this Tag that match the given + criteria.""" + + return self._findAll(name, attrs, None, limit, self.parentGenerator, + **kwargs) + fetchParents = findParents # Compatibility with pre-3.x + + #These methods do the real heavy lifting. + + def _findOne(self, method, name, attrs, text, **kwargs): + r = None + l = method(name, attrs, text, 1, **kwargs) + if l: + r = l[0] + return r + + def _findAll(self, name, attrs, text, limit, generator, **kwargs): + "Iterates over a generator looking for things that match." + + if isinstance(name, SoupStrainer): + strainer = name + # (Possibly) special case some findAll*(...) searches + elif text is None and not limit and not attrs and not kwargs: + # findAll*(True) + if name is True: + return [element for element in generator() + if isinstance(element, Tag)] + # findAll*('tag-name') + elif isinstance(name, basestring): + return [element for element in generator() + if isinstance(element, Tag) and + element.name == name] + else: + strainer = SoupStrainer(name, attrs, text, **kwargs) + # Build a SoupStrainer + else: + strainer = SoupStrainer(name, attrs, text, **kwargs) + results = ResultSet(strainer) + g = generator() + while True: + try: + i = g.next() + except StopIteration: + break + if i: + found = strainer.search(i) + if found: + results.append(found) + if limit and len(results) >= limit: + break + return results + + #These Generators can be used to navigate starting from both + #NavigableStrings and Tags. + def nextGenerator(self): + i = self + while i is not None: + i = i.next + yield i + + def nextSiblingGenerator(self): + i = self + while i is not None: + i = i.nextSibling + yield i + + def previousGenerator(self): + i = self + while i is not None: + i = i.previous + yield i + + def previousSiblingGenerator(self): + i = self + while i is not None: + i = i.previousSibling + yield i + + def parentGenerator(self): + i = self + while i is not None: + i = i.parent + yield i + + # Utility methods + def substituteEncoding(self, str, encoding=None): + encoding = encoding or "utf-8" + return str.replace("%SOUP-ENCODING%", encoding) + + def toEncoding(self, s, encoding=None): + """Encodes an object to a string in some encoding, or to Unicode. + .""" + if isinstance(s, unicode): + if encoding: + s = s.encode(encoding) + elif isinstance(s, str): + if encoding: + s = s.encode(encoding) + else: + s = unicode(s) + else: + if encoding: + s = self.toEncoding(str(s), encoding) + else: + s = unicode(s) + return s + +class NavigableString(unicode, PageElement): + + def __new__(cls, value): + """Create a new NavigableString. + + When unpickling a NavigableString, this method is called with + the string in DEFAULT_OUTPUT_ENCODING. That encoding needs to be + passed in to the superclass's __new__ or the superclass won't know + how to handle non-ASCII characters. + """ + if isinstance(value, unicode): + return unicode.__new__(cls, value) + return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING) + + def __getnewargs__(self): + return (NavigableString.__str__(self),) + + def __getattr__(self, attr): + """text.string gives you text. This is for backwards + compatibility for Navigable*String, but for CData* it lets you + get the string without the CData wrapper.""" + if attr == 'string': + return self + else: + raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr) + + def __unicode__(self): + return str(self).decode(DEFAULT_OUTPUT_ENCODING) + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + if encoding: + return self.encode(encoding) + else: + return self + +class CData(NavigableString): + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "<![CDATA[%s]]>" % NavigableString.__str__(self, encoding) + +class ProcessingInstruction(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + output = self + if "%SOUP-ENCODING%" in output: + output = self.substituteEncoding(output, encoding) + return "<?%s?>" % self.toEncoding(output, encoding) + +class Comment(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "<!--%s-->" % NavigableString.__str__(self, encoding) + +class Declaration(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "<!%s>" % NavigableString.__str__(self, encoding) + +class Tag(PageElement): + + """Represents a found HTML tag with its attributes and contents.""" + + def _invert(h): + "Cheap function to invert a hash." + i = {} + for k,v in h.items(): + i[v] = k + return i + + XML_ENTITIES_TO_SPECIAL_CHARS = { "apos" : "'", + "quot" : '"', + "amp" : "&", + "lt" : "<", + "gt" : ">" } + + XML_SPECIAL_CHARS_TO_ENTITIES = _invert(XML_ENTITIES_TO_SPECIAL_CHARS) + + def _convertEntities(self, match): + """Used in a call to re.sub to replace HTML, XML, and numeric + entities with the appropriate Unicode characters. If HTML + entities are being converted, any unrecognized entities are + escaped.""" + x = match.group(1) + if self.convertHTMLEntities and x in name2codepoint: + return unichr(name2codepoint[x]) + elif x in self.XML_ENTITIES_TO_SPECIAL_CHARS: + if self.convertXMLEntities: + return self.XML_ENTITIES_TO_SPECIAL_CHARS[x] + else: + return u'&%s;' % x + elif len(x) > 0 and x[0] == '#': + # Handle numeric entities + if len(x) > 1 and x[1] == 'x': + return unichr(int(x[2:], 16)) + else: + return unichr(int(x[1:])) + + elif self.escapeUnrecognizedEntities: + return u'&%s;' % x + else: + return u'&%s;' % x + + def __init__(self, parser, name, attrs=None, parent=None, + previous=None): + "Basic constructor." + + # We don't actually store the parser object: that lets extracted + # chunks be garbage-collected + self.parserClass = parser.__class__ + self.isSelfClosing = parser.isSelfClosingTag(name) + self.name = name + if attrs is None: + attrs = [] + elif isinstance(attrs, dict): + attrs = attrs.items() + self.attrs = attrs + self.contents = [] + self.setup(parent, previous) + self.hidden = False + self.containsSubstitutions = False + self.convertHTMLEntities = parser.convertHTMLEntities + self.convertXMLEntities = parser.convertXMLEntities + self.escapeUnrecognizedEntities = parser.escapeUnrecognizedEntities + + # Convert any HTML, XML, or numeric entities in the attribute values. + convert = lambda(k, val): (k, + re.sub("&(#\d+|#x[0-9a-fA-F]+|\w+);", + self._convertEntities, + val)) + self.attrs = map(convert, self.attrs) + + def getString(self): + if (len(self.contents) == 1 + and isinstance(self.contents[0], NavigableString)): + return self.contents[0] + + def setString(self, string): + """Replace the contents of the tag with a string""" + self.clear() + self.append(string) + + string = property(getString, setString) + + def getText(self, separator=u""): + if not len(self.contents): + return u"" + stopNode = self._lastRecursiveChild().next + strings = [] + current = self.contents[0] + while current is not stopNode: + if isinstance(current, NavigableString): + strings.append(current.strip()) + current = current.next + return separator.join(strings) + + text = property(getText) + + def get(self, key, default=None): + """Returns the value of the 'key' attribute for the tag, or + the value given for 'default' if it doesn't have that + attribute.""" + return self._getAttrMap().get(key, default) + + def clear(self): + """Extract all children.""" + for child in self.contents[:]: + child.extract() + + def index(self, element): + for i, child in enumerate(self.contents): + if child is element: + return i + raise ValueError("Tag.index: element not in tag") + + def has_key(self, key): + return self._getAttrMap().has_key(key) + + def __getitem__(self, key): + """tag[key] returns the value of the 'key' attribute for the tag, + and throws an exception if it's not there.""" + return self._getAttrMap()[key] + + def __iter__(self): + "Iterating over a tag iterates over its contents." + return iter(self.contents) + + def __len__(self): + "The length of a tag is the length of its list of contents." + return len(self.contents) + + def __contains__(self, x): + return x in self.contents + + def __nonzero__(self): + "A tag is non-None even if it has no contents." + return True + + def __setitem__(self, key, value): + """Setting tag[key] sets the value of the 'key' attribute for the + tag.""" + self._getAttrMap() + self.attrMap[key] = value + found = False + for i in range(0, len(self.attrs)): + if self.attrs[i][0] == key: + self.attrs[i] = (key, value) + found = True + if not found: + self.attrs.append((key, value)) + self._getAttrMap()[key] = value + + def __delitem__(self, key): + "Deleting tag[key] deletes all 'key' attributes for the tag." + for item in self.attrs: + if item[0] == key: + self.attrs.remove(item) + #We don't break because bad HTML can define the same + #attribute multiple times. + self._getAttrMap() + if self.attrMap.has_key(key): + del self.attrMap[key] + + def __call__(self, *args, **kwargs): + """Calling a tag like a function is the same as calling its + findAll() method. Eg. tag('a') returns a list of all the A tags + found within this tag.""" + return apply(self.findAll, args, kwargs) + + def __getattr__(self, tag): + #print "Getattr %s.%s" % (self.__class__, tag) + if len(tag) > 3 and tag.rfind('Tag') == len(tag)-3: + return self.find(tag[:-3]) + elif tag.find('__') != 0: + return self.find(tag) + raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__, tag) + + def __eq__(self, other): + """Returns true iff this tag has the same name, the same attributes, + and the same contents (recursively) as the given tag. + + NOTE: right now this will return false if two tags have the + same attributes in a different order. Should this be fixed?""" + if other is self: + return True + if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other): + return False + for i in range(0, len(self.contents)): + if self.contents[i] != other.contents[i]: + return False + return True + + def __ne__(self, other): + """Returns true iff this tag is not identical to the other tag, + as defined in __eq__.""" + return not self == other + + def __repr__(self, encoding=DEFAULT_OUTPUT_ENCODING): + """Renders this tag as a string.""" + return self.__str__(encoding) + + def __unicode__(self): + return self.__str__(None) + + BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|" + + "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)" + + ")") + + def _sub_entity(self, x): + """Used with a regular expression to substitute the + appropriate XML entity for an XML special character.""" + return "&" + self.XML_SPECIAL_CHARS_TO_ENTITIES[x.group(0)[0]] + ";" + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING, + prettyPrint=False, indentLevel=0): + """Returns a string or Unicode representation of this tag and + its contents. To get Unicode, pass None for encoding. + + NOTE: since Python's HTML parser consumes whitespace, this + method is not certain to reproduce the whitespace present in + the original string.""" + + encodedName = self.toEncoding(self.name, encoding) + + attrs = [] + if self.attrs: + for key, val in self.attrs: + fmt = '%s="%s"' + if isinstance(val, basestring): + if self.containsSubstitutions and '%SOUP-ENCODING%' in val: + val = self.substituteEncoding(val, encoding) + + # The attribute value either: + # + # * Contains no embedded double quotes or single quotes. + # No problem: we enclose it in double quotes. + # * Contains embedded single quotes. No problem: + # double quotes work here too. + # * Contains embedded double quotes. No problem: + # we enclose it in single quotes. + # * Embeds both single _and_ double quotes. This + # can't happen naturally, but it can happen if + # you modify an attribute value after parsing + # the document. Now we have a bit of a + # problem. We solve it by enclosing the + # attribute in single quotes, and escaping any + # embedded single quotes to XML entities. + if '"' in val: + fmt = "%s='%s'" + if "'" in val: + # TODO: replace with apos when + # appropriate. + val = val.replace("'", "&squot;") + + # Now we're okay w/r/t quotes. But the attribute + # value might also contain angle brackets, or + # ampersands that aren't part of entities. We need + # to escape those to XML entities too. + val = self.BARE_AMPERSAND_OR_BRACKET.sub(self._sub_entity, val) + + attrs.append(fmt % (self.toEncoding(key, encoding), + self.toEncoding(val, encoding))) + close = '' + closeTag = '' + if self.isSelfClosing: + close = ' /' + else: + closeTag = '</%s>' % encodedName + + indentTag, indentContents = 0, 0 + if prettyPrint: + indentTag = indentLevel + space = (' ' * (indentTag-1)) + indentContents = indentTag + 1 + contents = self.renderContents(encoding, prettyPrint, indentContents) + if self.hidden: + s = contents + else: + s = [] + attributeString = '' + if attrs: + attributeString = ' ' + ' '.join(attrs) + if prettyPrint: + s.append(space) + s.append('<%s%s%s>' % (encodedName, attributeString, close)) + if prettyPrint: + s.append("\n") + s.append(contents) + if prettyPrint and contents and contents[-1] != "\n": + s.append("\n") + if prettyPrint and closeTag: + s.append(space) + s.append(closeTag) + if prettyPrint and closeTag and self.nextSibling: + s.append("\n") + s = ''.join(s) + return s + + def decompose(self): + """Recursively destroys the contents of this tree.""" + self.extract() + if len(self.contents) == 0: + return + current = self.contents[0] + while current is not None: + next = current.next + if isinstance(current, Tag): + del current.contents[:] + current.parent = None + current.previous = None + current.previousSibling = None + current.next = None + current.nextSibling = None + current = next + + def prettify(self, encoding=DEFAULT_OUTPUT_ENCODING): + return self.__str__(encoding, True) + + def renderContents(self, encoding=DEFAULT_OUTPUT_ENCODING, + prettyPrint=False, indentLevel=0): + """Renders the contents of this tag as a string in the given + encoding. If encoding is None, returns a Unicode string..""" + s=[] + for c in self: + text = None + if isinstance(c, NavigableString): + text = c.__str__(encoding) + elif isinstance(c, Tag): + s.append(c.__str__(encoding, prettyPrint, indentLevel)) + if text and prettyPrint: + text = text.strip() + if text: + if prettyPrint: + s.append(" " * (indentLevel-1)) + s.append(text) + if prettyPrint: + s.append("\n") + return ''.join(s) + + #Soup methods + + def find(self, name=None, attrs={}, recursive=True, text=None, + **kwargs): + """Return only the first child of this Tag matching the given + criteria.""" + r = None + l = self.findAll(name, attrs, recursive, text, 1, **kwargs) + if l: + r = l[0] + return r + findChild = find + + def findAll(self, name=None, attrs={}, recursive=True, text=None, + limit=None, **kwargs): + """Extracts a list of Tag objects that match the given + criteria. You can specify the name of the Tag and any + attributes you want the Tag to have. + + The value of a key-value pair in the 'attrs' map can be a + string, a list of strings, a regular expression object, or a + callable that takes a string and returns whether or not the + string matches for some custom definition of 'matches'. The + same is true of the tag name.""" + generator = self.recursiveChildGenerator + if not recursive: + generator = self.childGenerator + return self._findAll(name, attrs, text, limit, generator, **kwargs) + findChildren = findAll + + # Pre-3.x compatibility methods + first = find + fetch = findAll + + def fetchText(self, text=None, recursive=True, limit=None): + return self.findAll(text=text, recursive=recursive, limit=limit) + + def firstText(self, text=None, recursive=True): + return self.find(text=text, recursive=recursive) + + #Private methods + + def _getAttrMap(self): + """Initializes a map representation of this tag's attributes, + if not already initialized.""" + if not getattr(self, 'attrMap'): + self.attrMap = {} + for (key, value) in self.attrs: + self.attrMap[key] = value + return self.attrMap + + #Generator methods + def childGenerator(self): + # Just use the iterator from the contents + return iter(self.contents) + + def recursiveChildGenerator(self): + if not len(self.contents): + raise StopIteration + stopNode = self._lastRecursiveChild().next + current = self.contents[0] + while current is not stopNode: + yield current + current = current.next + + +# Next, a couple classes to represent queries and their results. +class SoupStrainer: + """Encapsulates a number of ways of matching a markup element (tag or + text).""" + + def __init__(self, name=None, attrs={}, text=None, **kwargs): + self.name = name + if isinstance(attrs, basestring): + kwargs['class'] = _match_css_class(attrs) + attrs = None + if kwargs: + if attrs: + attrs = attrs.copy() + attrs.update(kwargs) + else: + attrs = kwargs + self.attrs = attrs + self.text = text + + def __str__(self): + if self.text: + return self.text + else: + return "%s|%s" % (self.name, self.attrs) + + def searchTag(self, markupName=None, markupAttrs={}): + found = None + markup = None + if isinstance(markupName, Tag): + markup = markupName + markupAttrs = markup + callFunctionWithTagData = callable(self.name) \ + and not isinstance(markupName, Tag) + + if (not self.name) \ + or callFunctionWithTagData \ + or (markup and self._matches(markup, self.name)) \ + or (not markup and self._matches(markupName, self.name)): + if callFunctionWithTagData: + match = self.name(markupName, markupAttrs) + else: + match = True + markupAttrMap = None + for attr, matchAgainst in self.attrs.items(): + if not markupAttrMap: + if hasattr(markupAttrs, 'get'): + markupAttrMap = markupAttrs + else: + markupAttrMap = {} + for k,v in markupAttrs: + markupAttrMap[k] = v + attrValue = markupAttrMap.get(attr) + if not self._matches(attrValue, matchAgainst): + match = False + break + if match: + if markup: + found = markup + else: + found = markupName + return found + + def search(self, markup): + #print 'looking for %s in %s' % (self, markup) + found = None + # If given a list of items, scan it for a text element that + # matches. + if hasattr(markup, "__iter__") \ + and not isinstance(markup, Tag): + for element in markup: + if isinstance(element, NavigableString) \ + and self.search(element): + found = element + break + # If it's a Tag, make sure its name or attributes match. + # Don't bother with Tags if we're searching for text. + elif isinstance(markup, Tag): + if not self.text: + found = self.searchTag(markup) + # If it's text, make sure the text matches. + elif isinstance(markup, NavigableString) or \ + isinstance(markup, basestring): + if self._matches(markup, self.text): + found = markup + else: + raise Exception, "I don't know how to match against a %s" \ + % markup.__class__ + return found + + def _matches(self, markup, matchAgainst): + #print "Matching %s against %s" % (markup, matchAgainst) + result = False + if matchAgainst is True: + result = markup is not None + elif callable(matchAgainst): + result = matchAgainst(markup) + else: + #Custom match methods take the tag as an argument, but all + #other ways of matching match the tag name as a string. + if isinstance(markup, Tag): + markup = markup.name + if markup and not isinstance(markup, basestring): + markup = unicode(markup) + #Now we know that chunk is either a string, or None. + if hasattr(matchAgainst, 'match'): + # It's a regexp object. + result = markup and matchAgainst.search(markup) + elif hasattr(matchAgainst, '__iter__'): # list-like + result = markup in matchAgainst + elif hasattr(matchAgainst, 'items'): + result = markup.has_key(matchAgainst) + elif matchAgainst and isinstance(markup, basestring): + if isinstance(markup, unicode): + matchAgainst = unicode(matchAgainst) + else: + matchAgainst = str(matchAgainst) + + if not result: + result = matchAgainst == markup + return result + +class ResultSet(list): + """A ResultSet is just a list that keeps track of the SoupStrainer + that created it.""" + def __init__(self, source): + list.__init__([]) + self.source = source + +# Now, some helper functions. + +def buildTagMap(default, *args): + """Turns a list of maps, lists, or scalars into a single map. + Used to build the SELF_CLOSING_TAGS, NESTABLE_TAGS, and + NESTING_RESET_TAGS maps out of lists and partial maps.""" + built = {} + for portion in args: + if hasattr(portion, 'items'): + #It's a map. Merge it. + for k,v in portion.items(): + built[k] = v + elif hasattr(portion, '__iter__'): # is a list + #It's a list. Map each item to the default. + for k in portion: + built[k] = default + else: + #It's a scalar. Map it to the default. + built[portion] = default + return built + +# Now, the parser classes. + +class BeautifulStoneSoup(Tag, SGMLParser): + + """This class contains the basic parser and search code. It defines + a parser that knows nothing about tag behavior except for the + following: + + You can't close a tag without closing all the tags it encloses. + That is, "<foo><bar></foo>" actually means + "<foo><bar></bar></foo>". + + [Another possible explanation is "<foo><bar /></foo>", but since + this class defines no SELF_CLOSING_TAGS, it will never use that + explanation.] + + This class is useful for parsing XML or made-up markup languages, + or when BeautifulSoup makes an assumption counter to what you were + expecting.""" + + SELF_CLOSING_TAGS = {} + NESTABLE_TAGS = {} + RESET_NESTING_TAGS = {} + QUOTE_TAGS = {} + PRESERVE_WHITESPACE_TAGS = [] + + MARKUP_MASSAGE = [(re.compile('(<[^<>]*)/>'), + lambda x: x.group(1) + ' />'), + (re.compile('<!\s+([^<>]*)>'), + lambda x: '<!' + x.group(1) + '>') + ] + + ROOT_TAG_NAME = u'[document]' + + HTML_ENTITIES = "html" + XML_ENTITIES = "xml" + XHTML_ENTITIES = "xhtml" + # TODO: This only exists for backwards-compatibility + ALL_ENTITIES = XHTML_ENTITIES + + # Used when determining whether a text node is all whitespace and + # can be replaced with a single space. A text node that contains + # fancy Unicode spaces (usually non-breaking) should be left + # alone. + STRIP_ASCII_SPACES = { 9: None, 10: None, 12: None, 13: None, 32: None, } + + def __init__(self, markup="", parseOnlyThese=None, fromEncoding=None, + markupMassage=True, smartQuotesTo=XML_ENTITIES, + convertEntities=None, selfClosingTags=None, isHTML=False): + """The Soup object is initialized as the 'root tag', and the + provided markup (which can be a string or a file-like object) + is fed into the underlying parser. + + sgmllib will process most bad HTML, and the BeautifulSoup + class has some tricks for dealing with some HTML that kills + sgmllib, but Beautiful Soup can nonetheless choke or lose data + if your data uses self-closing tags or declarations + incorrectly. + + By default, Beautiful Soup uses regexes to sanitize input, + avoiding the vast majority of these problems. If the problems + don't apply to you, pass in False for markupMassage, and + you'll get better performance. + + The default parser massage techniques fix the two most common + instances of invalid HTML that choke sgmllib: + + <br/> (No space between name of closing tag and tag close) + <! --Comment--> (Extraneous whitespace in declaration) + + You can pass in a custom list of (RE object, replace method) + tuples to get Beautiful Soup to scrub your input the way you + want.""" + + self.parseOnlyThese = parseOnlyThese + self.fromEncoding = fromEncoding + self.smartQuotesTo = smartQuotesTo + self.convertEntities = convertEntities + # Set the rules for how we'll deal with the entities we + # encounter + if self.convertEntities: + # It doesn't make sense to convert encoded characters to + # entities even while you're converting entities to Unicode. + # Just convert it all to Unicode. + self.smartQuotesTo = None + if convertEntities == self.HTML_ENTITIES: + self.convertXMLEntities = False + self.convertHTMLEntities = True + self.escapeUnrecognizedEntities = True + elif convertEntities == self.XHTML_ENTITIES: + self.convertXMLEntities = True + self.convertHTMLEntities = True + self.escapeUnrecognizedEntities = False + elif convertEntities == self.XML_ENTITIES: + self.convertXMLEntities = True + self.convertHTMLEntities = False + self.escapeUnrecognizedEntities = False + else: + self.convertXMLEntities = False + self.convertHTMLEntities = False + self.escapeUnrecognizedEntities = False + + self.instanceSelfClosingTags = buildTagMap(None, selfClosingTags) + SGMLParser.__init__(self) + + if hasattr(markup, 'read'): # It's a file-type object. + markup = markup.read() + self.markup = markup + self.markupMassage = markupMassage + try: + self._feed(isHTML=isHTML) + except StopParsing: + pass + self.markup = None # The markup can now be GCed + + def convert_charref(self, name): + """This method fixes a bug in Python's SGMLParser.""" + try: + n = int(name) + except ValueError: + return + if not 0 <= n <= 127 : # ASCII ends at 127, not 255 + return + return self.convert_codepoint(n) + + def _feed(self, inDocumentEncoding=None, isHTML=False): + # Convert the document to Unicode. + markup = self.markup + if isinstance(markup, unicode): + if not hasattr(self, 'originalEncoding'): + self.originalEncoding = None + else: + dammit = UnicodeDammit\ + (markup, [self.fromEncoding, inDocumentEncoding], + smartQuotesTo=self.smartQuotesTo, isHTML=isHTML) + markup = dammit.unicode + self.originalEncoding = dammit.originalEncoding + self.declaredHTMLEncoding = dammit.declaredHTMLEncoding + if markup: + if self.markupMassage: + if not hasattr(self.markupMassage, "__iter__"): + self.markupMassage = self.MARKUP_MASSAGE + for fix, m in self.markupMassage: + markup = fix.sub(m, markup) + # TODO: We get rid of markupMassage so that the + # soup object can be deepcopied later on. Some + # Python installations can't copy regexes. If anyone + # was relying on the existence of markupMassage, this + # might cause problems. + del(self.markupMassage) + self.reset() + + SGMLParser.feed(self, markup) + # Close out any unfinished strings and close all the open tags. + self.endData() + while self.currentTag.name != self.ROOT_TAG_NAME: + self.popTag() + + def __getattr__(self, methodName): + """This method routes method call requests to either the SGMLParser + superclass or the Tag superclass, depending on the method name.""" + #print "__getattr__ called on %s.%s" % (self.__class__, methodName) + + if methodName.startswith('start_') or methodName.startswith('end_') \ + or methodName.startswith('do_'): + return SGMLParser.__getattr__(self, methodName) + elif not methodName.startswith('__'): + return Tag.__getattr__(self, methodName) + else: + raise AttributeError + + def isSelfClosingTag(self, name): + """Returns true iff the given string is the name of a + self-closing tag according to this parser.""" + return self.SELF_CLOSING_TAGS.has_key(name) \ + or self.instanceSelfClosingTags.has_key(name) + + def reset(self): + Tag.__init__(self, self, self.ROOT_TAG_NAME) + self.hidden = 1 + SGMLParser.reset(self) + self.currentData = [] + self.currentTag = None + self.tagStack = [] + self.quoteStack = [] + self.pushTag(self) + + def popTag(self): + tag = self.tagStack.pop() + + #print "Pop", tag.name + if self.tagStack: + self.currentTag = self.tagStack[-1] + return self.currentTag + + def pushTag(self, tag): + #print "Push", tag.name + if self.currentTag: + self.currentTag.contents.append(tag) + self.tagStack.append(tag) + self.currentTag = self.tagStack[-1] + + def endData(self, containerClass=NavigableString): + if self.currentData: + currentData = u''.join(self.currentData) + if (currentData.translate(self.STRIP_ASCII_SPACES) == '' and + not set([tag.name for tag in self.tagStack]).intersection( + self.PRESERVE_WHITESPACE_TAGS)): + if '\n' in currentData: + currentData = '\n' + else: + currentData = ' ' + self.currentData = [] + if self.parseOnlyThese and len(self.tagStack) <= 1 and \ + (not self.parseOnlyThese.text or \ + not self.parseOnlyThese.search(currentData)): + return + o = containerClass(currentData) + o.setup(self.currentTag, self.previous) + if self.previous: + self.previous.next = o + self.previous = o + self.currentTag.contents.append(o) + + + def _popToTag(self, name, inclusivePop=True): + """Pops the tag stack up to and including the most recent + instance of the given tag. If inclusivePop is false, pops the tag + stack up to but *not* including the most recent instqance of + the given tag.""" + #print "Popping to %s" % name + if name == self.ROOT_TAG_NAME: + return + + numPops = 0 + mostRecentTag = None + for i in range(len(self.tagStack)-1, 0, -1): + if name == self.tagStack[i].name: + numPops = len(self.tagStack)-i + break + if not inclusivePop: + numPops = numPops - 1 + + for i in range(0, numPops): + mostRecentTag = self.popTag() + return mostRecentTag + + def _smartPop(self, name): + + """We need to pop up to the previous tag of this type, unless + one of this tag's nesting reset triggers comes between this + tag and the previous tag of this type, OR unless this tag is a + generic nesting trigger and another generic nesting trigger + comes between this tag and the previous tag of this type. + + Examples: + <p>Foo<b>Bar *<p>* should pop to 'p', not 'b'. + <p>Foo<table>Bar *<p>* should pop to 'table', not 'p'. + <p>Foo<table><tr>Bar *<p>* should pop to 'tr', not 'p'. + + <li><ul><li> *<li>* should pop to 'ul', not the first 'li'. + <tr><table><tr> *<tr>* should pop to 'table', not the first 'tr' + <td><tr><td> *<td>* should pop to 'tr', not the first 'td' + """ + + nestingResetTriggers = self.NESTABLE_TAGS.get(name) + isNestable = nestingResetTriggers != None + isResetNesting = self.RESET_NESTING_TAGS.has_key(name) + popTo = None + inclusive = True + for i in range(len(self.tagStack)-1, 0, -1): + p = self.tagStack[i] + if (not p or p.name == name) and not isNestable: + #Non-nestable tags get popped to the top or to their + #last occurance. + popTo = name + break + if (nestingResetTriggers is not None + and p.name in nestingResetTriggers) \ + or (nestingResetTriggers is None and isResetNesting + and self.RESET_NESTING_TAGS.has_key(p.name)): + + #If we encounter one of the nesting reset triggers + #peculiar to this tag, or we encounter another tag + #that causes nesting to reset, pop up to but not + #including that tag. + popTo = p.name + inclusive = False + break + p = p.parent + if popTo: + self._popToTag(popTo, inclusive) + + def unknown_starttag(self, name, attrs, selfClosing=0): + #print "Start tag %s: %s" % (name, attrs) + if self.quoteStack: + #This is not a real tag. + #print "<%s> is not real!" % name + attrs = ''.join([' %s="%s"' % (x, y) for x, y in attrs]) + self.handle_data('<%s%s>' % (name, attrs)) + return + self.endData() + + if not self.isSelfClosingTag(name) and not selfClosing: + self._smartPop(name) + + if self.parseOnlyThese and len(self.tagStack) <= 1 \ + and (self.parseOnlyThese.text or not self.parseOnlyThese.searchTag(name, attrs)): + return + + tag = Tag(self, name, attrs, self.currentTag, self.previous) + if self.previous: + self.previous.next = tag + self.previous = tag + self.pushTag(tag) + if selfClosing or self.isSelfClosingTag(name): + self.popTag() + if name in self.QUOTE_TAGS: + #print "Beginning quote (%s)" % name + self.quoteStack.append(name) + self.literal = 1 + return tag + + def unknown_endtag(self, name): + #print "End tag %s" % name + if self.quoteStack and self.quoteStack[-1] != name: + #This is not a real end tag. + #print "</%s> is not real!" % name + self.handle_data('</%s>' % name) + return + self.endData() + self._popToTag(name) + if self.quoteStack and self.quoteStack[-1] == name: + self.quoteStack.pop() + self.literal = (len(self.quoteStack) > 0) + + def handle_data(self, data): + self.currentData.append(data) + + def _toStringSubclass(self, text, subclass): + """Adds a certain piece of text to the tree as a NavigableString + subclass.""" + self.endData() + self.handle_data(text) + self.endData(subclass) + + def handle_pi(self, text): + """Handle a processing instruction as a ProcessingInstruction + object, possibly one with a %SOUP-ENCODING% slot into which an + encoding will be plugged later.""" + if text[:3] == "xml": + text = u"xml version='1.0' encoding='%SOUP-ENCODING%'" + self._toStringSubclass(text, ProcessingInstruction) + + def handle_comment(self, text): + "Handle comments as Comment objects." + self._toStringSubclass(text, Comment) + + def handle_charref(self, ref): + "Handle character references as data." + if self.convertEntities: + data = unichr(int(ref)) + else: + data = '&#%s;' % ref + self.handle_data(data) + + def handle_entityref(self, ref): + """Handle entity references as data, possibly converting known + HTML and/or XML entity references to the corresponding Unicode + characters.""" + data = None + if self.convertHTMLEntities: + try: + data = unichr(name2codepoint[ref]) + except KeyError: + pass + + if not data and self.convertXMLEntities: + data = self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref) + + if not data and self.convertHTMLEntities and \ + not self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref): + # TODO: We've got a problem here. We're told this is + # an entity reference, but it's not an XML entity + # reference or an HTML entity reference. Nonetheless, + # the logical thing to do is to pass it through as an + # unrecognized entity reference. + # + # Except: when the input is "&carol;" this function + # will be called with input "carol". When the input is + # "AT&T", this function will be called with input + # "T". We have no way of knowing whether a semicolon + # was present originally, so we don't know whether + # this is an unknown entity or just a misplaced + # ampersand. + # + # The more common case is a misplaced ampersand, so I + # escape the ampersand and omit the trailing semicolon. + data = "&%s" % ref + if not data: + # This case is different from the one above, because we + # haven't already gone through a supposedly comprehensive + # mapping of entities to Unicode characters. We might not + # have gone through any mapping at all. So the chances are + # very high that this is a real entity, and not a + # misplaced ampersand. + data = "&%s;" % ref + self.handle_data(data) + + def handle_decl(self, data): + "Handle DOCTYPEs and the like as Declaration objects." + self._toStringSubclass(data, Declaration) + + def parse_declaration(self, i): + """Treat a bogus SGML declaration as raw data. Treat a CDATA + declaration as a CData object.""" + j = None + if self.rawdata[i:i+9] == '<![CDATA[': + k = self.rawdata.find(']]>', i) + if k == -1: + k = len(self.rawdata) + data = self.rawdata[i+9:k] + j = k+3 + self._toStringSubclass(data, CData) + else: + try: + j = SGMLParser.parse_declaration(self, i) + except SGMLParseError: + toHandle = self.rawdata[i:] + self.handle_data(toHandle) + j = i + len(toHandle) + return j + +class BeautifulSoup(BeautifulStoneSoup): + + """This parser knows the following facts about HTML: + + * Some tags have no closing tag and should be interpreted as being + closed as soon as they are encountered. + + * The text inside some tags (ie. 'script') may contain tags which + are not really part of the document and which should be parsed + as text, not tags. If you want to parse the text as tags, you can + always fetch it and parse it explicitly. + + * Tag nesting rules: + + Most tags can't be nested at all. For instance, the occurance of + a <p> tag should implicitly close the previous <p> tag. + + <p>Para1<p>Para2 + should be transformed into: + <p>Para1</p><p>Para2 + + Some tags can be nested arbitrarily. For instance, the occurance + of a <blockquote> tag should _not_ implicitly close the previous + <blockquote> tag. + + Alice said: <blockquote>Bob said: <blockquote>Blah + should NOT be transformed into: + Alice said: <blockquote>Bob said: </blockquote><blockquote>Blah + + Some tags can be nested, but the nesting is reset by the + interposition of other tags. For instance, a <tr> tag should + implicitly close the previous <tr> tag within the same <table>, + but not close a <tr> tag in another table. + + <table><tr>Blah<tr>Blah + should be transformed into: + <table><tr>Blah</tr><tr>Blah + but, + <tr>Blah<table><tr>Blah + should NOT be transformed into + <tr>Blah<table></tr><tr>Blah + + Differing assumptions about tag nesting rules are a major source + of problems with the BeautifulSoup class. If BeautifulSoup is not + treating as nestable a tag your page author treats as nestable, + try ICantBelieveItsBeautifulSoup, MinimalSoup, or + BeautifulStoneSoup before writing your own subclass.""" + + def __init__(self, *args, **kwargs): + if not kwargs.has_key('smartQuotesTo'): + kwargs['smartQuotesTo'] = self.HTML_ENTITIES + kwargs['isHTML'] = True + BeautifulStoneSoup.__init__(self, *args, **kwargs) + + SELF_CLOSING_TAGS = buildTagMap(None, + ('br' , 'hr', 'input', 'img', 'meta', + 'spacer', 'link', 'frame', 'base', 'col')) + + PRESERVE_WHITESPACE_TAGS = set(['pre', 'textarea']) + + QUOTE_TAGS = {'script' : None, 'textarea' : None} + + #According to the HTML standard, each of these inline tags can + #contain another tag of the same type. Furthermore, it's common + #to actually use these tags this way. + NESTABLE_INLINE_TAGS = ('span', 'font', 'q', 'object', 'bdo', 'sub', 'sup', + 'center') + + #According to the HTML standard, these block tags can contain + #another tag of the same type. Furthermore, it's common + #to actually use these tags this way. + NESTABLE_BLOCK_TAGS = ('blockquote', 'div', 'fieldset', 'ins', 'del') + + #Lists can contain other lists, but there are restrictions. + NESTABLE_LIST_TAGS = { 'ol' : [], + 'ul' : [], + 'li' : ['ul', 'ol'], + 'dl' : [], + 'dd' : ['dl'], + 'dt' : ['dl'] } + + #Tables can contain other tables, but there are restrictions. + NESTABLE_TABLE_TAGS = {'table' : [], + 'tr' : ['table', 'tbody', 'tfoot', 'thead'], + 'td' : ['tr'], + 'th' : ['tr'], + 'thead' : ['table'], + 'tbody' : ['table'], + 'tfoot' : ['table'], + } + + NON_NESTABLE_BLOCK_TAGS = ('address', 'form', 'p', 'pre') + + #If one of these tags is encountered, all tags up to the next tag of + #this type are popped. + RESET_NESTING_TAGS = buildTagMap(None, NESTABLE_BLOCK_TAGS, 'noscript', + NON_NESTABLE_BLOCK_TAGS, + NESTABLE_LIST_TAGS, + NESTABLE_TABLE_TAGS) + + NESTABLE_TAGS = buildTagMap([], NESTABLE_INLINE_TAGS, NESTABLE_BLOCK_TAGS, + NESTABLE_LIST_TAGS, NESTABLE_TABLE_TAGS) + + # Used to detect the charset in a META tag; see start_meta + CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M) + + def start_meta(self, attrs): + """Beautiful Soup can detect a charset included in a META tag, + try to convert the document to that charset, and re-parse the + document from the beginning.""" + httpEquiv = None + contentType = None + contentTypeIndex = None + tagNeedsEncodingSubstitution = False + + for i in range(0, len(attrs)): + key, value = attrs[i] + key = key.lower() + if key == 'http-equiv': + httpEquiv = value + elif key == 'content': + contentType = value + contentTypeIndex = i + + if httpEquiv and contentType: # It's an interesting meta tag. + match = self.CHARSET_RE.search(contentType) + if match: + if (self.declaredHTMLEncoding is not None or + self.originalEncoding == self.fromEncoding): + # An HTML encoding was sniffed while converting + # the document to Unicode, or an HTML encoding was + # sniffed during a previous pass through the + # document, or an encoding was specified + # explicitly and it worked. Rewrite the meta tag. + def rewrite(match): + return match.group(1) + "%SOUP-ENCODING%" + newAttr = self.CHARSET_RE.sub(rewrite, contentType) + attrs[contentTypeIndex] = (attrs[contentTypeIndex][0], + newAttr) + tagNeedsEncodingSubstitution = True + else: + # This is our first pass through the document. + # Go through it again with the encoding information. + newCharset = match.group(3) + if newCharset and newCharset != self.originalEncoding: + self.declaredHTMLEncoding = newCharset + self._feed(self.declaredHTMLEncoding) + raise StopParsing + pass + tag = self.unknown_starttag("meta", attrs) + if tag and tagNeedsEncodingSubstitution: + tag.containsSubstitutions = True + +class StopParsing(Exception): + pass + +class ICantBelieveItsBeautifulSoup(BeautifulSoup): + + """The BeautifulSoup class is oriented towards skipping over + common HTML errors like unclosed tags. However, sometimes it makes + errors of its own. For instance, consider this fragment: + + <b>Foo<b>Bar</b></b> + + This is perfectly valid (if bizarre) HTML. However, the + BeautifulSoup class will implicitly close the first b tag when it + encounters the second 'b'. It will think the author wrote + "<b>Foo<b>Bar", and didn't close the first 'b' tag, because + there's no real-world reason to bold something that's already + bold. When it encounters '</b></b>' it will close two more 'b' + tags, for a grand total of three tags closed instead of two. This + can throw off the rest of your document structure. The same is + true of a number of other tags, listed below. + + It's much more common for someone to forget to close a 'b' tag + than to actually use nested 'b' tags, and the BeautifulSoup class + handles the common case. This class handles the not-co-common + case: where you can't believe someone wrote what they did, but + it's valid HTML and BeautifulSoup screwed up by assuming it + wouldn't be.""" + + I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \ + ('em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong', + 'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b', + 'big') + + I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ('noscript',) + + NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS, + I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS, + I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS) + +class MinimalSoup(BeautifulSoup): + """The MinimalSoup class is for parsing HTML that contains + pathologically bad markup. It makes no assumptions about tag + nesting, but it does know which tags are self-closing, that + <script> tags contain Javascript and should not be parsed, that + META tags may contain encoding information, and so on. + + This also makes it better for subclassing than BeautifulStoneSoup + or BeautifulSoup.""" + + RESET_NESTING_TAGS = buildTagMap('noscript') + NESTABLE_TAGS = {} + +class BeautifulSOAP(BeautifulStoneSoup): + """This class will push a tag with only a single string child into + the tag's parent as an attribute. The attribute's name is the tag + name, and the value is the string child. An example should give + the flavor of the change: + + <foo><bar>baz</bar></foo> + => + <foo bar="baz"><bar>baz</bar></foo> + + You can then access fooTag['bar'] instead of fooTag.barTag.string. + + This is, of course, useful for scraping structures that tend to + use subelements instead of attributes, such as SOAP messages. Note + that it modifies its input, so don't print the modified version + out. + + I'm not sure how many people really want to use this class; let me + know if you do. Mainly I like the name.""" + + def popTag(self): + if len(self.tagStack) > 1: + tag = self.tagStack[-1] + parent = self.tagStack[-2] + parent._getAttrMap() + if (isinstance(tag, Tag) and len(tag.contents) == 1 and + isinstance(tag.contents[0], NavigableString) and + not parent.attrMap.has_key(tag.name)): + parent[tag.name] = tag.contents[0] + BeautifulStoneSoup.popTag(self) + +#Enterprise class names! It has come to our attention that some people +#think the names of the Beautiful Soup parser classes are too silly +#and "unprofessional" for use in enterprise screen-scraping. We feel +#your pain! For such-minded folk, the Beautiful Soup Consortium And +#All-Night Kosher Bakery recommends renaming this file to +#"RobustParser.py" (or, in cases of extreme enterprisiness, +#"RobustParserBeanInterface.class") and using the following +#enterprise-friendly class aliases: +class RobustXMLParser(BeautifulStoneSoup): + pass +class RobustHTMLParser(BeautifulSoup): + pass +class RobustWackAssHTMLParser(ICantBelieveItsBeautifulSoup): + pass +class RobustInsanelyWackAssHTMLParser(MinimalSoup): + pass +class SimplifyingSOAPParser(BeautifulSOAP): + pass + +###################################################### +# +# Bonus library: Unicode, Dammit +# +# This class forces XML data into a standard format (usually to UTF-8 +# or Unicode). It is heavily based on code from Mark Pilgrim's +# Universal Feed Parser. It does not rewrite the XML or HTML to +# reflect a new encoding: that happens in BeautifulStoneSoup.handle_pi +# (XML) and BeautifulSoup.start_meta (HTML). + +# Autodetects character encodings. +# Download from http://chardet.feedparser.org/ +try: + import chardet +# import chardet.constants +# chardet.constants._debug = 1 +except ImportError: + chardet = None + +# cjkcodecs and iconv_codec make Python know about more character encodings. +# Both are available from http://cjkpython.i18n.org/ +# They're built in if you use Python 2.4. +try: + import cjkcodecs.aliases +except ImportError: + pass +try: + import iconv_codec +except ImportError: + pass + +class UnicodeDammit: + """A class for detecting the encoding of a *ML document and + converting it to a Unicode string. If the source encoding is + windows-1252, can replace MS smart quotes with their HTML or XML + equivalents.""" + + # This dictionary maps commonly seen values for "charset" in HTML + # meta tags to the corresponding Python codec names. It only covers + # values that aren't in Python's aliases and can't be determined + # by the heuristics in find_codec. + CHARSET_ALIASES = { "macintosh" : "mac-roman", + "x-sjis" : "shift-jis" } + + def __init__(self, markup, overrideEncodings=[], + smartQuotesTo='xml', isHTML=False): + self.declaredHTMLEncoding = None + self.markup, documentEncoding, sniffedEncoding = \ + self._detectEncoding(markup, isHTML) + self.smartQuotesTo = smartQuotesTo + self.triedEncodings = [] + if markup == '' or isinstance(markup, unicode): + self.originalEncoding = None + self.unicode = unicode(markup) + return + + u = None + for proposedEncoding in overrideEncodings: + u = self._convertFrom(proposedEncoding) + if u: break + if not u: + for proposedEncoding in (documentEncoding, sniffedEncoding): + u = self._convertFrom(proposedEncoding) + if u: break + + # If no luck and we have auto-detection library, try that: + if not u and chardet and not isinstance(self.markup, unicode): + u = self._convertFrom(chardet.detect(self.markup)['encoding']) + + # As a last resort, try utf-8 and windows-1252: + if not u: + for proposed_encoding in ("utf-8", "windows-1252"): + u = self._convertFrom(proposed_encoding) + if u: break + + self.unicode = u + if not u: self.originalEncoding = None + + def _subMSChar(self, orig): + """Changes a MS smart quote character to an XML or HTML + entity.""" + sub = self.MS_CHARS.get(orig) + if isinstance(sub, tuple): + if self.smartQuotesTo == 'xml': + sub = '&#x%s;' % sub[1] + else: + sub = '&%s;' % sub[0] + return sub + + def _convertFrom(self, proposed): + proposed = self.find_codec(proposed) + if not proposed or proposed in self.triedEncodings: + return None + self.triedEncodings.append(proposed) + markup = self.markup + + # Convert smart quotes to HTML if coming from an encoding + # that might have them. + if self.smartQuotesTo and proposed.lower() in("windows-1252", + "iso-8859-1", + "iso-8859-2"): + markup = re.compile("([\x80-\x9f])").sub \ + (lambda(x): self._subMSChar(x.group(1)), + markup) + + try: + # print "Trying to convert document to %s" % proposed + u = self._toUnicode(markup, proposed) + self.markup = u + self.originalEncoding = proposed + except Exception, e: + # print "That didn't work!" + # print e + return None + #print "Correct encoding: %s" % proposed + return self.markup + + def _toUnicode(self, data, encoding): + '''Given a string and its encoding, decodes the string into Unicode. + %encoding is a string recognized by encodings.aliases''' + + # strip Byte Order Mark (if present) + if (len(data) >= 4) and (data[:2] == '\xfe\xff') \ + and (data[2:4] != '\x00\x00'): + encoding = 'utf-16be' + data = data[2:] + elif (len(data) >= 4) and (data[:2] == '\xff\xfe') \ + and (data[2:4] != '\x00\x00'): + encoding = 'utf-16le' + data = data[2:] + elif data[:3] == '\xef\xbb\xbf': + encoding = 'utf-8' + data = data[3:] + elif data[:4] == '\x00\x00\xfe\xff': + encoding = 'utf-32be' + data = data[4:] + elif data[:4] == '\xff\xfe\x00\x00': + encoding = 'utf-32le' + data = data[4:] + newdata = unicode(data, encoding) + return newdata + + def _detectEncoding(self, xml_data, isHTML=False): + """Given a document, tries to detect its XML encoding.""" + xml_encoding = sniffed_xml_encoding = None + try: + if xml_data[:4] == '\x4c\x6f\xa7\x94': + # EBCDIC + xml_data = self._ebcdic_to_ascii(xml_data) + elif xml_data[:4] == '\x00\x3c\x00\x3f': + # UTF-16BE + sniffed_xml_encoding = 'utf-16be' + xml_data = unicode(xml_data, 'utf-16be').encode('utf-8') + elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') \ + and (xml_data[2:4] != '\x00\x00'): + # UTF-16BE with BOM + sniffed_xml_encoding = 'utf-16be' + xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8') + elif xml_data[:4] == '\x3c\x00\x3f\x00': + # UTF-16LE + sniffed_xml_encoding = 'utf-16le' + xml_data = unicode(xml_data, 'utf-16le').encode('utf-8') + elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and \ + (xml_data[2:4] != '\x00\x00'): + # UTF-16LE with BOM + sniffed_xml_encoding = 'utf-16le' + xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8') + elif xml_data[:4] == '\x00\x00\x00\x3c': + # UTF-32BE + sniffed_xml_encoding = 'utf-32be' + xml_data = unicode(xml_data, 'utf-32be').encode('utf-8') + elif xml_data[:4] == '\x3c\x00\x00\x00': + # UTF-32LE + sniffed_xml_encoding = 'utf-32le' + xml_data = unicode(xml_data, 'utf-32le').encode('utf-8') + elif xml_data[:4] == '\x00\x00\xfe\xff': + # UTF-32BE with BOM + sniffed_xml_encoding = 'utf-32be' + xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8') + elif xml_data[:4] == '\xff\xfe\x00\x00': + # UTF-32LE with BOM + sniffed_xml_encoding = 'utf-32le' + xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8') + elif xml_data[:3] == '\xef\xbb\xbf': + # UTF-8 with BOM + sniffed_xml_encoding = 'utf-8' + xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8') + else: + sniffed_xml_encoding = 'ascii' + pass + except: + xml_encoding_match = None + xml_encoding_match = re.compile( + '^<\?.*encoding=[\'"](.*?)[\'"].*\?>').match(xml_data) + if not xml_encoding_match and isHTML: + regexp = re.compile('<\s*meta[^>]+charset=([^>]*?)[;\'">]', re.I) + xml_encoding_match = regexp.search(xml_data) + if xml_encoding_match is not None: + xml_encoding = xml_encoding_match.groups()[0].lower() + if isHTML: + self.declaredHTMLEncoding = xml_encoding + if sniffed_xml_encoding and \ + (xml_encoding in ('iso-10646-ucs-2', 'ucs-2', 'csunicode', + 'iso-10646-ucs-4', 'ucs-4', 'csucs4', + 'utf-16', 'utf-32', 'utf_16', 'utf_32', + 'utf16', 'u16')): + xml_encoding = sniffed_xml_encoding + return xml_data, xml_encoding, sniffed_xml_encoding + + + def find_codec(self, charset): + return self._codec(self.CHARSET_ALIASES.get(charset, charset)) \ + or (charset and self._codec(charset.replace("-", ""))) \ + or (charset and self._codec(charset.replace("-", "_"))) \ + or charset + + def _codec(self, charset): + if not charset: return charset + codec = None + try: + codecs.lookup(charset) + codec = charset + except (LookupError, ValueError): + pass + return codec + + EBCDIC_TO_ASCII_MAP = None + def _ebcdic_to_ascii(self, s): + c = self.__class__ + if not c.EBCDIC_TO_ASCII_MAP: + emap = (0,1,2,3,156,9,134,127,151,141,142,11,12,13,14,15, + 16,17,18,19,157,133,8,135,24,25,146,143,28,29,30,31, + 128,129,130,131,132,10,23,27,136,137,138,139,140,5,6,7, + 144,145,22,147,148,149,150,4,152,153,154,155,20,21,158,26, + 32,160,161,162,163,164,165,166,167,168,91,46,60,40,43,33, + 38,169,170,171,172,173,174,175,176,177,93,36,42,41,59,94, + 45,47,178,179,180,181,182,183,184,185,124,44,37,95,62,63, + 186,187,188,189,190,191,192,193,194,96,58,35,64,39,61,34, + 195,97,98,99,100,101,102,103,104,105,196,197,198,199,200, + 201,202,106,107,108,109,110,111,112,113,114,203,204,205, + 206,207,208,209,126,115,116,117,118,119,120,121,122,210, + 211,212,213,214,215,216,217,218,219,220,221,222,223,224, + 225,226,227,228,229,230,231,123,65,66,67,68,69,70,71,72, + 73,232,233,234,235,236,237,125,74,75,76,77,78,79,80,81, + 82,238,239,240,241,242,243,92,159,83,84,85,86,87,88,89, + 90,244,245,246,247,248,249,48,49,50,51,52,53,54,55,56,57, + 250,251,252,253,254,255) + import string + c.EBCDIC_TO_ASCII_MAP = string.maketrans( \ + ''.join(map(chr, range(256))), ''.join(map(chr, emap))) + return s.translate(c.EBCDIC_TO_ASCII_MAP) + + MS_CHARS = { '\x80' : ('euro', '20AC'), + '\x81' : ' ', + '\x82' : ('sbquo', '201A'), + '\x83' : ('fnof', '192'), + '\x84' : ('bdquo', '201E'), + '\x85' : ('hellip', '2026'), + '\x86' : ('dagger', '2020'), + '\x87' : ('Dagger', '2021'), + '\x88' : ('circ', '2C6'), + '\x89' : ('permil', '2030'), + '\x8A' : ('Scaron', '160'), + '\x8B' : ('lsaquo', '2039'), + '\x8C' : ('OElig', '152'), + '\x8D' : '?', + '\x8E' : ('#x17D', '17D'), + '\x8F' : '?', + '\x90' : '?', + '\x91' : ('lsquo', '2018'), + '\x92' : ('rsquo', '2019'), + '\x93' : ('ldquo', '201C'), + '\x94' : ('rdquo', '201D'), + '\x95' : ('bull', '2022'), + '\x96' : ('ndash', '2013'), + '\x97' : ('mdash', '2014'), + '\x98' : ('tilde', '2DC'), + '\x99' : ('trade', '2122'), + '\x9a' : ('scaron', '161'), + '\x9b' : ('rsaquo', '203A'), + '\x9c' : ('oelig', '153'), + '\x9d' : '?', + '\x9e' : ('#x17E', '17E'), + '\x9f' : ('Yuml', ''),} + +####################################################################### + + +#By default, act as an HTML pretty-printer. +if __name__ == '__main__': + import sys + soup = BeautifulSoup(sys.stdin) + print soup.prettify()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/LocationFile.py Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,59 @@ +#!/usr/bin/env python + +import sys + +def die( message ): + print >> sys.stderr, message + sys.exit(1) + +def open_or_die( filename, mode='r', message=None ): + if message is None: + message = 'Error opening {0}'.format( filename ) + try: + fh = open( filename, mode ) + except IOError, err: + die( '{0}: {1}'.format( message, err.strerror ) ) + return fh + +class LocationFile( object ): + def __init__( self, filename, comment_chars=None, delimiter='\t', key_column=0 ): + self.filename = filename + if comment_chars is None: + self.comment_chars = ( '#' ) + else: + self.comment_chars = tuple( comment_chars ) + self.delimiter = delimiter + self.key_column = key_column + self._map = {} + self._populate_map() + + def _populate_map( self ): + try: + with open( self.filename ) as fh: + line_number = 0 + for line in fh: + line_number += 1 + line = line.rstrip( '\r\n' ) + if not line.startswith( self.comment_chars ): + elems = line.split( self.delimiter ) + if len( elems ) <= self.key_column: + die( 'Location file {0} line {1}: less than {2} columns'.format( self.filename, line_number, self.key_column + 1 ) ) + else: + key = elems.pop( self.key_column ) + if key in self._map: + if self._map[key] != elems: + die( 'Location file {0} line {1}: duplicate key "{2}"'.format( self.filename, line_number, key ) ) + else: + self._map[key] = elems + except IOError, err: + die( 'Error opening location file {0}: {1}'.format( self.filename, err.strerror ) ) + + def get_values( self, key ): + if key in self._map: + rval = self._map[key] + if len( rval ) == 1: + return rval[0] + else: + return rval + else: + die( 'key "{0}" not found in location file {1}'.format( key, self.filename ) )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/OrderedDict.py Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,259 @@ +# http://code.activestate.com/recipes/576693/ +# Backport of OrderedDict() class that runs on Python 2.4, 2.5, 2.6, 2.7 and pypy. +# Passes Python2.7's test suite and incorporates all the latest updates. + +try: + from thread import get_ident as _get_ident +except ImportError: + from dummy_thread import get_ident as _get_ident + +try: + from _abcoll import KeysView, ValuesView, ItemsView +except ImportError: + pass + + +class OrderedDict(dict): + 'Dictionary that remembers insertion order' + # An inherited dict maps keys to values. + # The inherited dict provides __getitem__, __len__, __contains__, and get. + # The remaining methods are order-aware. + # Big-O running times for all methods are the same as for regular dictionaries. + + # The internal self.__map dictionary maps keys to links in a doubly linked list. + # The circular doubly linked list starts and ends with a sentinel element. + # The sentinel element never gets deleted (this simplifies the algorithm). + # Each link is stored as a list of length three: [PREV, NEXT, KEY]. + + def __init__(self, *args, **kwds): + '''Initialize an ordered dictionary. Signature is the same as for + regular dictionaries, but keyword arguments are not recommended + because their insertion order is arbitrary. + + ''' + if len(args) > 1: + raise TypeError('expected at most 1 arguments, got %d' % len(args)) + try: + self.__root + except AttributeError: + self.__root = root = [] # sentinel node + root[:] = [root, root, None] + self.__map = {} + self.__update(*args, **kwds) + + def __setitem__(self, key, value, dict_setitem=dict.__setitem__): + 'od.__setitem__(i, y) <==> od[i]=y' + # Setting a new item creates a new link which goes at the end of the linked + # list, and the inherited dictionary is updated with the new key/value pair. + if key not in self: + root = self.__root + last = root[0] + last[1] = root[0] = self.__map[key] = [last, root, key] + dict_setitem(self, key, value) + + def __delitem__(self, key, dict_delitem=dict.__delitem__): + 'od.__delitem__(y) <==> del od[y]' + # Deleting an existing item uses self.__map to find the link which is + # then removed by updating the links in the predecessor and successor nodes. + dict_delitem(self, key) + link_prev, link_next, key = self.__map.pop(key) + link_prev[1] = link_next + link_next[0] = link_prev + + def __iter__(self): + 'od.__iter__() <==> iter(od)' + root = self.__root + curr = root[1] + while curr is not root: + yield curr[2] + curr = curr[1] + + def __reversed__(self): + 'od.__reversed__() <==> reversed(od)' + root = self.__root + curr = root[0] + while curr is not root: + yield curr[2] + curr = curr[0] + + def clear(self): + 'od.clear() -> None. Remove all items from od.' + try: + for node in self.__map.itervalues(): + del node[:] + root = self.__root + root[:] = [root, root, None] + self.__map.clear() + except AttributeError: + pass + dict.clear(self) + + def popitem(self, last=True): + '''od.popitem() -> (k, v), return and remove a (key, value) pair. + Pairs are returned in LIFO order if last is true or FIFO order if false. + + ''' + if not self: + raise KeyError('dictionary is empty') + root = self.__root + if last: + link = root[0] + link_prev = link[0] + link_prev[1] = root + root[0] = link_prev + else: + link = root[1] + link_next = link[1] + root[1] = link_next + link_next[0] = root + key = link[2] + del self.__map[key] + value = dict.pop(self, key) + return key, value + + # -- the following methods do not depend on the internal structure -- + + def keys(self): + 'od.keys() -> list of keys in od' + return list(self) + + def values(self): + 'od.values() -> list of values in od' + return [self[key] for key in self] + + def items(self): + 'od.items() -> list of (key, value) pairs in od' + return [(key, self[key]) for key in self] + + def iterkeys(self): + 'od.iterkeys() -> an iterator over the keys in od' + return iter(self) + + def itervalues(self): + 'od.itervalues -> an iterator over the values in od' + for k in self: + yield self[k] + + def iteritems(self): + 'od.iteritems -> an iterator over the (key, value) items in od' + for k in self: + yield (k, self[k]) + + def update(*args, **kwds): + '''od.update(E, **F) -> None. Update od from dict/iterable E and F. + + If E is a dict instance, does: for k in E: od[k] = E[k] + If E has a .keys() method, does: for k in E.keys(): od[k] = E[k] + Or if E is an iterable of items, does: for k, v in E: od[k] = v + In either case, this is followed by: for k, v in F.items(): od[k] = v + + ''' + if len(args) > 2: + raise TypeError('update() takes at most 2 positional ' + 'arguments (%d given)' % (len(args),)) + elif not args: + raise TypeError('update() takes at least 1 argument (0 given)') + self = args[0] + # Make progressively weaker assumptions about "other" + other = () + if len(args) == 2: + other = args[1] + if isinstance(other, dict): + for key in other: + self[key] = other[key] + elif hasattr(other, 'keys'): + for key in other.keys(): + self[key] = other[key] + else: + for key, value in other: + self[key] = value + for key, value in kwds.items(): + self[key] = value + + __update = update # let subclasses override update without breaking __init__ + + __marker = object() + + def pop(self, key, default=__marker): + '''od.pop(k[,d]) -> v, remove specified key and return the corresponding value. + If key is not found, d is returned if given, otherwise KeyError is raised. + + ''' + if key in self: + result = self[key] + del self[key] + return result + if default is self.__marker: + raise KeyError(key) + return default + + def setdefault(self, key, default=None): + 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od' + if key in self: + return self[key] + self[key] = default + return default + + def __repr__(self, _repr_running={}): + 'od.__repr__() <==> repr(od)' + call_key = id(self), _get_ident() + if call_key in _repr_running: + return '...' + _repr_running[call_key] = 1 + try: + if not self: + return '%s()' % (self.__class__.__name__,) + return '%s(%r)' % (self.__class__.__name__, self.items()) + finally: + del _repr_running[call_key] + + def __reduce__(self): + 'Return state information for pickling' + items = [[k, self[k]] for k in self] + inst_dict = vars(self).copy() + for k in vars(OrderedDict()): + inst_dict.pop(k, None) + if inst_dict: + return (self.__class__, (items,), inst_dict) + return self.__class__, (items,) + + def copy(self): + 'od.copy() -> a shallow copy of od' + return self.__class__(self) + + @classmethod + def fromkeys(cls, iterable, value=None): + '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S + and values equal to v (which defaults to None). + + ''' + d = cls() + for key in iterable: + d[key] = value + return d + + def __eq__(self, other): + '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive + while comparison to a regular mapping is order-insensitive. + + ''' + if isinstance(other, OrderedDict): + return len(self)==len(other) and self.items() == other.items() + return dict.__eq__(self, other) + + def __ne__(self, other): + return not self == other + + # -- the following methods are only used in Python 2.7 -- + + def viewkeys(self): + "od.viewkeys() -> a set-like object providing a view on od's keys" + return KeysView(self) + + def viewvalues(self): + "od.viewvalues() -> an object providing a view on od's values" + return ValuesView(self) + + def viewitems(self): + "od.viewitems() -> a set-like object providing a view on od's items" + return ItemsView(self)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Population.py Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,128 @@ +#!/usr/bin/env python + +from OrderedDict import OrderedDict + +class Individual(object): + __slots__ = ['_column', '_name', '_alias'] + + def __init__(self, column, name, alias=None): + self._column = column + self._name = name + self._alias = alias + + @property + def column(self): + return self._column + + @property + def name(self): + return self._name if self._alias is None else self._alias + + @property + def alias(self): + return self._alias + + @alias.setter + def alias(self, alias): + self._alias = alias + + @property + def real_name(self): + return self._name + + def __eq__(self, other): + return self._column == other._column and self._name == other._name + + def __ne__(self, other): + return not self.__eq__(other) + + def __repr__(self): + return 'Individual: column={0} name={1} alias={2}'.format(self._column, self._name, self._alias) + + +class Population(object): + def __init__(self, name=None): + self._columns = OrderedDict() + self._name = name + + @property + def name(self): + return self._name + + @name.setter + def name(self, name): + self._name = name + + def add_individual(self, individual, alias=None): + if individual.column not in self._columns: + self._columns[individual.column] = individual + elif self._columns[individual.column] == individual: + # should should this be an error? + # should we replace the alias using this entry? + pass + else: + raise 'Duplicate column: {0}'.format(individual) + + def is_superset(self, other): + for column, other_individual in other._columns.items(): + our_individual = self._columns.get(column) + if our_individual is None or our_individual != other_individual: + return False + return True + + def is_disjoint(self, other): + for column, our_individual in self._columns.items(): + other_individual = other._columns.get(column) + if other_individual is not None and other_individual == our_individual: + return False + return True + + def column_list(self): + return self._columns.keys() + + def individual_with_column(self, column): + if column in self._columns: + return self._columns[column] + return None + + def tag_list(self, delimiter=':'): + entries = [] + for column, individual in self._columns.items(): + entry = '{0}{1}{2}'.format(column, delimiter, individual.name) + entries.append(entry) + return entries + + def to_string(self, delimiter=':', separator=' ', replace_names_with=None): + entries = [] + for column, individual in self._columns.items(): + value = individual.name + if replace_names_with is not None: + value = replace_names_with + entry = '{0}{1}{2}'.format(column, delimiter, value) + entries.append(entry) + return separator.join(entries) + + def __str__(self): + return self.to_string() + + def from_population_file(self, filename): + with open(filename) as fh: + for line in fh: + line = line.rstrip('\r\n') + column, name, alias = line.split('\t') + alias = alias.strip() + individual = Individual(column, name) + if alias: + individual.alias = alias + self.add_individual(individual) + + def from_tag_list(self, tag_list): + for tag in tag_list: + column, name = tag.split(':') + individual = Individual(column, name) + self.add_individual(individual) + + def individual_names(self): + for column, individual in self._columns.items(): + yield individual.name +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/add_fst_column.py Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,69 @@ +#!/usr/bin/env python + +# <command interpreter="python"> +# add_fst_column.py "$input" "$p1_input" "$p2_input" "$data_source.choice" "$data_source.min_value" "$retain" "$discard_fixed" "$biased" "$output" +# #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns) +# #set $arg = '%s:%s' % ($individual_col, $individual) +# "$arg" +# #end for +# </command> + +import sys +import subprocess +from Population import Population + +################################################################################ + +if len(sys.argv) < 12: + print >> sys.stderr, "Usage" + sys.exit(1) + +input, p1_input, p2_input, genotypes, min_reads, min_qual, retain, discard_fixed, biased, output = sys.argv[1:11] +individual_metadata = sys.argv[11:] + +p_total = Population() +p_total.from_tag_list(individual_metadata) + +p1 = Population() +p1.from_population_file(p1_input) +if not p_total.is_superset(p1): + print >> sys.stderr, 'There is an individual in population 1 that is not in the SNP table' + sys.exit(1) + +p2 = Population() +p2.from_population_file(p2_input) +if not p_total.is_superset(p2): + print >> sys.stderr, 'There is an individual in population 2 that is not in the SNP table' + sys.exit(1) + +################################################################################ + +prog = 'Fst_column' + +args = [] +args.append(prog) +args.append(input) +args.append(genotypes) +args.append(min_reads) +args.append(min_qual) +args.append(retain) +args.append(discard_fixed) +args.append(biased) + +columns = p1.column_list() +for column in columns: + args.append('{0}:1'.format(column)) + +columns = p2.column_list() +for column in columns: + args.append('{0}:2'.format(column)) + +fh = open(output, 'w') + +#print "args:", ' '.join(args) +p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=fh, stderr=sys.stderr) +rc = p.wait() +fh.close() + +sys.exit(0) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/add_fst_column.xml Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,90 @@ +<tool id="gd_add_fst_column" name="Add an FST column" version="1.0.0"> + <description>to a table</description> + + <command interpreter="python"> + add_fst_column.py "$input" "$p1_input" "$p2_input" "$data_source" "$min_reads" "$min_qual" "$retain" "$discard_fixed" "$biased" "$output" + #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns) + #set $arg = '%s:%s' % ($individual_col, $individual) + "$arg" + #end for + </command> + + <inputs> + <param name="input" type="data" format="wsf" label="SNP table" /> + <param name="p1_input" type="data" format="ind" label="Population 1 individuals" /> + <param name="p2_input" type="data" format="ind" label="Population 2 individuals" /> + + <param name="data_source" type="select" format="integer" label="Data source"> + <option value="0" selected="true">sequence coverage</option> + <option value="1">estimated genotype</option> + </param> + + <param name="min_reads" type="integer" min="0" value="0" label="Minimum total read count for a population" /> + <param name="min_qual" type="integer" min="0" value="0" label="Minimum individual genotype quality" /> + + <param name="retain" type="select" label="Special treatment"> + <option value="0" selected="true">Skip row</option> + <option value="1">Set FST = -1</option> + </param> + + <param name="discard_fixed" type="select" label="Apparently fixed SNPs"> + <option value="0">Retain SNPs that appear fixed in the two populations</option> + <option value="1" selected="true">Delete SNPs that appear fixed in the two populations</option> + </param> + + <param name="biased" type="select" label="FST estimator"> + <option value="0" selected="true">Wright's original definition</option> + <option value="1">Weir's unbiased estimator</option> + </param> + + </inputs> + + <outputs> + <data name="output" format="wsf" metadata_source="input" /> + </outputs> + + <tests> + <test> + <param name="input" value="genome_diversity/test_in/sample.wsf" ftype="wsf" /> + <param name="p1_input" value="genome_diversity/test_in/a.ind" ftype="ind" /> + <param name="p2_input" value="genome_diversity/test_in/b.ind" ftype="ind" /> + <param name="data_source" value="0" /> + <param name="min_reads" value="3" /> + <param name="min_qual" value="0" /> + <param name="retain" value="0" /> + <param name="discard_fixed" value="1" /> + <param name="biased" value="0" /> + <output name="output" file="genome_diversity/test_out/add_fst_column/add_fst_column.wsf" /> + </test> + </tests> + + <help> +**What it does** + +The user specifies a SNP table and two "populations" of individuals, +both previously defined using the Galaxy tool to select individuals from +a SNP table. No individual can be in both populations. Other choices are +as follows. + +Data source. The allele frequencies of a SNP in the two populations can be +estimated either by the total number of reads of each allele, or by adding +the frequencies inferred from genotypes of individuals in the populations. + +After specifying the data source, the user sets lower bounds on amount +of data required at a SNP. For estimating the Fst using read counts, +the bound is the minimum count of reads of the two alleles in a population. +For estimations based on genotype, the bound is the minimum reported genotype +quality per individual. + +The user specifies whether the SNPs that violate the lower bound should be +ignored or the Fst set to -1. + +The user specifies whether SNPs where both populations appear to be fixed +for the same allele should be retained or discarded. + +Finally, the user chooses which definition of Fst to use: Wright's original +definition or Weir's unbiased estimator. + +A column is appended to the SNP table giving the Fst for each retained SNP. + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/average_fst.py Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,78 @@ +#!/usr/bin/env python + +import sys +import subprocess +from Population import Population + +################################################################################ + +if len(sys.argv) < 12: + print >> sys.stderr, "Usage" + sys.exit(1) + +input, p1_input, p2_input, data_source, min_total_count, discard_fixed, biased, output, shuffles, p0_input = sys.argv[1:11] +individual_metadata = sys.argv[11:] + +try: + shuffle_count = int(shuffles) +except: + shuffle_count = 0 + +p_total = Population() +p_total.from_tag_list(individual_metadata) + +p1 = Population() +p1.from_population_file(p1_input) +if not p_total.is_superset(p1): + print >> sys.stderr, 'There is an individual in population 1 that is not in the SNP table' + sys.exit(1) + +p2 = Population() +p2.from_population_file(p2_input) +if not p_total.is_superset(p2): + print >> sys.stderr, 'There is an individual in population 2 that is not in the SNP table' + sys.exit(1) + +p0 = None +if shuffle_count > 0: + p0 = Population() + p0.from_population_file(p0_input) + if not p_total.is_superset(p0): + print >> sys.stderr, 'There is an individual in population 0 that is not in the SNP table' + sys.exit(1) + +################################################################################ + +prog = 'Fst_ave' + +args = [] +args.append(prog) +args.append(input) +args.append(data_source) +args.append(min_total_count) +args.append(discard_fixed) +args.append(biased) +args.append(shuffles) + +columns = p1.column_list() +for column in columns: + args.append('{0}:1'.format(column)) + +columns = p2.column_list() +for column in columns: + args.append('{0}:2'.format(column)) + +if p0 is not None: + columns = p0.column_list() + for column in columns: + args.append('{0}:0'.format(column)) + +fh = open(output, 'w') + +#print "args:", ' '.join(args) +p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=fh, stderr=sys.stderr) +rc = p.wait() +fh.close() + +sys.exit(0) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/average_fst.xml Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,112 @@ +<tool id="gd_average_fst" name="Average FST" version="1.0.0"> + <description>of two populations</description> + + <command interpreter="python"> + average_fst.py "$input" "$p1_input" "$p2_input" "$data_source.ds_choice" "$data_source.min_value" "$discard_fixed" "$biased" "$output" + #if $use_randomization.ur_choice == '1' + "$use_randomization.shuffles" "$use_randomization.p0_input" + #else + "0" "/dev/null" + #end if + #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns) + #set $arg = '%s:%s' % ($individual_col, $individual) + "$arg" + #end for + </command> + + <inputs> + <param name="input" type="data" format="wsf" label="SNP table" /> + <param name="p1_input" type="data" format="ind" label="Population 1 individuals" /> + <param name="p2_input" type="data" format="ind" label="Population 2 individuals" /> + + <conditional name="data_source"> + <param name="ds_choice" type="select" format="integer" label="Data source"> + <option value="0" selected="true">sequence coverage and ..</option> + <option value="1">estimated genotype and ..</option> + </param> + <when value="0"> + <param name="min_value" type="integer" min="1" value="1" label="Minimum total read count for a population" /> + </when> + <when value="1"> + <param name="min_value" type="integer" min="1" value="1" label="Minimum individual genotype quality" /> + </when> + </conditional> + + <param name="discard_fixed" type="select" label="Apparently fixed SNPs"> + <option value="0">Retain SNPs that appear fixed in the two populations</option> + <option value="1" selected="true">Delete SNPs that appear fixed in the two populations</option> + </param> + + <param name="biased" type="select" label="FST estimator"> + <option value="0" selected="true">Wright's original definition</option> + <option value="1">Weir's unbiased estimator</option> + </param> + + <conditional name="use_randomization"> + <param name="ur_choice" type="select" format="integer" label="Use randomization"> + <option value="0" selected="true">No</option> + <option value="1">Yes</option> + </param> + <when value="0" /> + <when value="1"> + <param name="shuffles" type="integer" min="0" value="0" label="Shuffles" /> + <param name="p0_input" type="data" format="ind" label="Individuals for randomization" /> + </when> + </conditional> + </inputs> + + <outputs> + <data name="output" format="txt" /> + </outputs> + + <tests> + <test> + <param name="input" value="genome_diversity/test_in/sample.wsf" ftype="wsf" /> + <param name="p1_input" value="genome_diversity/test_in/a.ind" ftype="ind" /> + <param name="p2_input" value="genome_diversity/test_in/b.ind" ftype="ind" /> + <param name="ds_choice" value="0" /> + <param name="min_value" value="3" /> + <param name="discard_fixed" value="1" /> + <param name="biased" value="0" /> + <param name="ur_choice" value="0" /> + <output name="output" file="genome_diversity/test_out/average_fst/average_fst.txt" /> + </test> + </tests> + + <help> +**What it does** + +The user specifies a SNP table and two "populations" of individuals, +both previously defined using the Galaxy tool to select individuals from +a SNP table. No individual can be in both populations. Other choices are +as follows. + +Data source. The allele frequencies of a SNP in the two populations can be +estimated either by the total number of reads of each allele, or by adding +the frequencies inferred from genotypes of individuals in the populations. + +After specifying the data source, the user sets lower bounds on amount +of data required at a SNP. For estimating the Fst using read counts, +the bound is the minimum count of reads of the two alleles in a population. +For estimations based on genotype, the bound is the minimum reported genotype +quality per individual. SNPs not meeting these lower bounds are ignored. + +The user specifies whether SNPs where both populations appear to be fixed +for the same allele should be retained or discarded. + +The user chooses which definition of Fst to use: Wright's original definition +or Weir's unbiased estimator. + +Finally, the user decides whether to use randomizations. If so, then the +user specifies how many randomly generated population pairs (retaining +the numbers of individuals of the originals) to generate, as well as the +"population" of additional individuals (not in the first two populations) +that can be used in the randomization process. + +The program prints the average Fst for the original populations and the +number of SNPs used to compute it. If randomizations were requested, +it prints the average Fst for each randomly generated population pair, +ending with a summary that includes the maximum and average value, and the +highest-scoring population pair. + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/calclenchange.py Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,280 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# calclenchange.py +# +# Copyright 2011 Oscar Bedoya-Reina <oscar@niska.bx.psu.edu> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301, USA. + +import argparse,mechanize,os,sys +from decimal import Decimal,getcontext +from xml.etree.ElementTree import ElementTree,tostring +import networkx as nx +from copy import copy + +#method to rank the the pthways by mut. freq. +def rankdN(ltfreqs): + ordvals=sorted(ltfreqs)#sort and reverse freqs. + #~ + outrnk=[] + tmpChng0,tmpOri,tmpMut,tmpPthw=ordvals.pop()#the highest possible value + if tmpOri=='C': + if tmpMut!='C': + tmpChng0='C-%s'%tmpMut + else: + tmpChng0=Decimal('0') + crank=1 + outrnk.append([str(tmpChng0),str(tmpOri),str(tmpMut),str(crank),tmpPthw]) + totalnvals=len(ordvals) + cnt=0 + while totalnvals>cnt: + cnt+=1 + tmpChng,tmpOri,tmpMut,tmpPthw=ordvals.pop() + if tmpOri=='C': + if tmpMut!='C': + tmpChng='C-%s'%tmpMut + else: + tmpChng=Decimal('0') + if tmpChng!=tmpChng0: + crank=len(outrnk)+1 + tmpChng0=tmpChng + outrnk.append([str(tmpChng),str(tmpOri),str(tmpMut),str(crank),tmpPthw]) + return outrnk + +#method to rank the the pthways by mut. freq. +def rankdAvr(ltfreqs): + ordvals=sorted(ltfreqs)#sort and reverse freqs. + #~ + outrnk={} + tmpChng0,tmpOri,tmpMut,tmpPthw=ordvals.pop()#the highest possible value + if tmpOri=='I': + if tmpMut!='I': + tmpChng0='I-%s'%tmpMut + else: + tmpChng0=Decimal('0') + crank=1 + outrnk[tmpPthw]='\t'.join([str(tmpChng0),str(tmpOri),str(tmpMut),str(crank)]) + totalnvals=len(ordvals) + cnt=0 + while totalnvals>cnt: + cnt+=1 + tmpChng,tmpOri,tmpMut,tmpPthw=ordvals.pop() + if tmpOri=='I': + if tmpMut!='I': + tmpChng='I-%s'%tmpMut + else: + tmpChng=Decimal('0') + if tmpChng!=tmpChng0: + crank=len(outrnk)+1 + tmpChng0=tmpChng + outrnk[tmpPthw]='\t'.join([str(tmpChng),str(tmpOri),str(tmpMut),str(crank)]) + return outrnk + +#this method takes as input a list of pairs of edges(beginNod,endNod) and returns a list of nodes with indegree 0 and outdegree 0 +def returnstartanendnodes(edges): + listID0st=set()#starts + listOD0en=set()#end + for beginNod,endNod in edges:# O(n) + listID0st.add(beginNod) + listOD0en.add(endNod) + startNdsID0=listID0st.difference(listOD0en) + endNdsOD0=listOD0en.difference(listID0st) + return startNdsID0,endNdsOD0 + +#~ Method to return nodes and edges +def returnNodesNEdgesfKXML(fpthwKGXML): + #~ + tree = ElementTree() + ptree=tree.parse(fpthwKGXML) + #~ + title=ptree.get('title') + prots=ptree.findall('entry') + reactns=ptree.findall('reaction') + #~ + edges,ndstmp=set(),set() + nreactns=len(reactns) + cr=0#count reacts + while nreactns>cr: + cr+=1 + reactn=reactns.pop() + mainid=reactn.get('id') + ndstmp.add(mainid)#add node + reacttyp=reactn.get('type') + sbstrts=reactn.findall('substrate') + while len(sbstrts)>0: + csbstrt=sbstrts.pop() + csbtsid=csbstrt.get('id') + ndstmp.add(csbtsid)#add node + if reacttyp=='irreversible': + edges.add((csbtsid,mainid))#add edges + elif reacttyp=='reversible': + edges.add((mainid,csbtsid))#add edges + edges.add((csbtsid,mainid))#add edges + #~ + prdcts=reactn.findall('product') + while len(prdcts)>0: + prdct=prdcts.pop() + prodctid=prdct.get('id') + ndstmp.add(prodctid)#add node + if reacttyp=='irreversible': + edges.add((mainid,prodctid))#add edges + elif reacttyp=='reversible': + edges.add((mainid,prodctid))#add edges + edges.add((prodctid,mainid))#add edges + #~ Nodes + nprots=len(prots) + cp=0#count prots + dnodes={} + while nprots>cp: + cp+=1 + prot=prots.pop() + tmpProtnm=prot.get('id') + if tmpProtnm in ndstmp: + dnodes[prot.get('id')]=set(prot.get('name').split())#each genename for each Id + return dnodes,edges,title + +#~ make calculation on pathways +def rtrnAvrgLen(edges,strNds,endNds): + wG=nx.DiGraph()#reference graph + wG.add_edges_from(edges) + dPairsSrcSnks=nx.all_pairs_shortest_path_length(wG)#dictionary between sources and sink and length + nstartNdsID0=len(strNds) + cstrtNds=0 + nPaths=0 + lPathLen=[] + while nstartNdsID0>cstrtNds: + cStartNd=strNds.pop()#current start node + dEndNdsLen=dPairsSrcSnks.pop(cStartNd) + for cendNd in dEndNdsLen: + if cendNd in endNds: + lPathLen.append(dEndNdsLen[cendNd]) + nPaths+=1 + cstrtNds+=1 + AvrgPthLen=0 + if nPaths!=0: + AvrgPthLen=Decimal(sum(lPathLen))/Decimal(str(nPaths)) + return nPaths,AvrgPthLen + +def main(): + parser = argparse.ArgumentParser(description='Rank pathways based on the change in length and number of paths connecting sources and sinks.') + parser.add_argument('--loc_file',metavar='correlational database',type=str,help='correlational database') + parser.add_argument('--species',metavar='species name',type=str,help='the species of interest in loc_file') + parser.add_argument('--output',metavar='output TXT file',type=str,help='the output file with the table in txt format. Column 1 is the diference between column 2 and column 3, Column 2 is the pathway average length (between sources and sinks) including the genes in the input list, Column 3 is the pathway average length EXCLUDING the genes in the input list, Column 4 is the rank based on column 1. Column 5 is the diference between column 6 and column 7, Column 6 is the number of paths between sources and sinks, including the genes in the input list, Column 7 is the number of paths between sources and sinks EXCLUDING the genes in the input list, Column 8 is the rank based on column 5. Column 9 I the pathway name' ) + parser.add_argument('--posKEGGclmn',metavar='column number',type=int,help='the column with the KEGG pathway code/name') + parser.add_argument('--KEGGgeneposcolmn',metavar='column number',type=int,help='column with the KEGG gene code') + parser.add_argument('--input',metavar='input TXT file',type=str,help='the input file with the table in txt format') + #~ + #~Open arguments + class C(object): + pass + fulargs=C() + parser.parse_args(sys.argv[1:],namespace=fulargs) + #test input vars + inputf,loc_file,species,output,posKEGGclmn,Kgeneposcolmn=fulargs.input,fulargs.loc_file,fulargs.species,fulargs.output,fulargs.posKEGGclmn,fulargs.KEGGgeneposcolmn + posKEGGclmn-=1#correct pos + Kgeneposcolmn-=1 + #~ Get the extra variables + crDB=[x.split() for x in open(loc_file).read().splitlines() if x.split()[0]==species][0] + sppPrefx,dinput=crDB[1],crDB[2] + #~ set decimal positions + getcontext().prec = 3 + #make a dictionary of valid genes + dKEGGcPthws=dict([(x.split('\t')[Kgeneposcolmn],set([y.split('=')[0] for y in x.split('\t')[posKEGGclmn].split('.')])) for x in open(inputf).read().splitlines()[1:] if x.strip()]) + sdGenes=set([x for x in dKEGGcPthws.keys() if x.find('.')>-1]) + while True:#to crrect names with more than one gene + try: + mgenes=sdGenes.pop() + pthwsAssotd=dKEGGcPthws.pop(mgenes) + mgenes=mgenes.split('.') + for eachg in mgenes: + dKEGGcPthws[eachg]=pthwsAssotd + except: + break + #~ + lPthwsF=[x for x in os.listdir(dinput) if x.find('.xml')>-1 if x not in ['cfa04070.xml']] + nPthws=len(lPthwsF) + cPthw=0 + lPthwPthN=[]#the output list for number of paths + lPthwPthAvr=[]#the output list for the length of paths + #~ + while cPthw<nPthws: + cPthw+=1 + KEGGpathw=lPthwsF.pop() + comdKEGGpathw=KEGGpathw.split('.')[0] + tmpddGenrcgenPresent=set() + sKEGGc=dKEGGcPthws.keys() + lsKEGGc=len(sKEGGc) + ctPthw=0 + while ctPthw < lsKEGGc:#to save memory + eachK=sKEGGc.pop() + alPthws=dKEGGcPthws[eachK] + if comdKEGGpathw in alPthws: + tmpddGenrcgenPresent.add(':'.join([sppPrefx,eachK])) + ctPthw+=1 + #~ Make graph calculations + dnodes,edges,title=returnNodesNEdgesfKXML(open(os.path.join(dinput,KEGGpathw))) + startNdsID0,endNdsOD0=returnstartanendnodes(edges) + startNdsOri=copy(startNdsID0) + #~ + nPaths='C'#stands for circuit + AvrgPthLen='I'#stand for infinite + if len(startNdsID0)>0 and len(endNdsOD0)>0: + nPaths,AvrgPthLen=rtrnAvrgLen(edges,startNdsID0,endNdsOD0) + #~ work with the genes in the list + genestodel=set() + lnodes=len(dnodes) + sNds=set(dnodes) + ctPthw=0 + while ctPthw<lnodes: + ctPthw+=1 + cNod=sNds.pop() + sgenes=dnodes.pop(cNod) + if len(sgenes.intersection(tmpddGenrcgenPresent))==len(sgenes): + genestodel.add(cNod) + #~ del nodes from graph edges + wnPaths,wAvrgPthLen=copy(nPaths),copy(AvrgPthLen) + if len(genestodel)>0: + wedges=set([x for x in edges if len(set(x).intersection(genestodel))==0]) + wstartNds,wendNds=returnstartanendnodes(wedges) + if nPaths!='C': + wstartNds=[x for x in wstartNds if x in startNdsOri] + wendNds=[x for x in wendNds if x in endNdsOD0] + if len(wstartNds)>0 and len(wendNds)>0: + wnPaths,wAvrgPthLen=rtrnAvrgLen(wedges,wstartNds,wendNds) + #~ Calculate the differences + orNP,mutNP,oriLen,mutLen=nPaths,wnPaths,AvrgPthLen,wAvrgPthLen + if nPaths=='C': + orNP=Decimal('1000') + oriLen=Decimal('1000') + if wnPaths=='C': + mutNP=Decimal('1000') + mutLen=Decimal('1000') + lPthwPthN.append([orNP-mutNP,nPaths,wnPaths,'='.join([comdKEGGpathw,title])])#print nPaths,AvrgPthLen + lPthwPthAvr.append([oriLen-mutLen,AvrgPthLen,wAvrgPthLen,'='.join([comdKEGGpathw,title])])#print nPaths,AvrgPthLen + doutrnkPthN=rankdN(lPthwPthN) + doutrnkPthAvr=rankdAvr(lPthwPthAvr) + #~ + sall=['\t'.join([doutrnkPthAvr[x[4]],'\t'.join(x)]) for x in doutrnkPthN] + salef=open(output,'w') + salef.write('\n'.join(sall)) + salef.close() + return 0 + + +if __name__ == '__main__': + main() +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/calctfreq.py Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,114 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# calcfreq.py +# +# Copyright 2011 Oscar Bedoya-Reina <oscar@niska.bx.psu.edu> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301, USA. + +import argparse,os,sys +from decimal import Decimal,getcontext +from LocationFile import LocationFile + +#method to rank the the pthways by mut. freq. +def rankd(ltfreqs): + ordvals=sorted(ltfreqs)#sort and reverse freqs. + #~ + outrnk=[] + tmpFreq0,tmpCount,tmpPthw=ordvals.pop()#the highest possible value + crank=1 + outrnk.append('\t'.join([str(tmpCount),str(tmpFreq0),str(crank),tmpPthw])) + totalnvals=len(ordvals) + cnt=0 + while totalnvals>cnt: + cnt+=1 + tmpFreq,tmpCount,tmpPthw=ordvals.pop() + if tmpFreq!=tmpFreq0: + crank=len(outrnk)+1 + tmpFreq0=tmpFreq + outrnk.append('\t'.join([str(tmpCount),str(tmpFreq),str(crank),tmpPthw])) + return outrnk + + +def main(): + parser = argparse.ArgumentParser(description='Obtain KEGG images from a list of genes.') + parser.add_argument('--input',metavar='input TXT file',type=str,help='the input file with the table in txt format') + parser.add_argument('--output',metavar='output TXT file',type=str,help='the output file with the table in txt format. Column 1 is the count of genes in the list, Column 2 is the percentage of the pathway genes present on the list. Column 3 is the rank based on column 2') + parser.add_argument('--posKEGGclmn',metavar='column number',type=int,help='the column with the KEGG pathway code/name') + parser.add_argument('--KEGGgeneposcolmn',metavar='column number',type=int,help='column with the KEGG gene code') + parser.add_argument('--loc_file',metavar='location file',type=str,help='location file') + parser.add_argument('--species',metavar='species',type=str,help='species') + #~Open arguments + class C(object): + pass + fulargs=C() + parser.parse_args(sys.argv[1:],namespace=fulargs) + #test input vars + inputf,outputf,posKEGGclmn,Kgeneposcolmn=fulargs.input,fulargs.output,fulargs.posKEGGclmn,fulargs.KEGGgeneposcolmn + locf,species=fulargs.loc_file,fulargs.species + #make a dictionary of valid genes + posKEGGclmn-=1 + Kgeneposcolmn-=1 + dKEGGcPthws=dict([(x.split('\t')[Kgeneposcolmn],set(x.split('\t')[posKEGGclmn].split('.'))) for x in open(inputf).read().splitlines()[1:] if x.strip()]) + sdGenes=set([x for x in dKEGGcPthws.keys() if x.find('.')>-1]) + while True:#to correct names with more than one gene + try: + mgenes=sdGenes.pop() + pthwsAssotd=dKEGGcPthws.pop(mgenes) + mgenes=mgenes.split('.') + for eachg in mgenes: + dKEGGcPthws[eachg]=pthwsAssotd + except: + break + #~ Count genes + getcontext().prec=2#set 2 decimal places + + location_file = LocationFile(locf) + prefix, kxml_dir_path, dict_file = location_file.get_values(species) + dPthContsTotls = {} + try: + with open(dict_file) as fh: + for line in fh: + line = line.rstrip('\r\n') + value, key = line.split('\t') + dPthContsTotls[key] = int(value) + except IOError, err: + print >> sys.stderr, 'Error opening dict file {0}: {1}'.format(dict_file, err.strerror) + sys.exit(1) + + dPthContsTmp=dict([(x,0) for x in dPthContsTotls.keys()])#create a list of genes + sdGenes=set([x for x in dKEGGcPthws.keys()])#list of all genes + cntGens=0 + ltGens=len(sdGenes) + while cntGens<ltGens: + cGen=sdGenes.pop() + sKEGGcPthws=dKEGGcPthws.pop(cGen) + for eachP in sKEGGcPthws: + if eachP!='N': + dPthContsTmp[eachP]+=1 + cntGens+=1 + #~ Calculate Freqs. + ltfreqs=[((Decimal(dPthContsTmp[x])/Decimal(dPthContsTotls[x])),Decimal(dPthContsTmp[x]),x) for x in dPthContsTotls] + tabllfreqs='\n'.join(rankd(ltfreqs)) + salef=open(outputf,'w') + salef.write(tabllfreqs) + salef.close() + return 0 + + +if __name__ == '__main__': + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cdblib.py Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,230 @@ +#!/usr/bin/env python2.5 + +''' +Manipulate DJB's Constant Databases. These are 2 level disk-based hash tables +that efficiently handle many keys, while remaining space-efficient. + + http://cr.yp.to/cdb.html + +When generated databases are only used with Python code, consider using hash() +rather than djb_hash() for a tidy speedup. +''' + +from _struct import Struct +from itertools import chain + + +def py_djb_hash(s): + '''Return the value of DJB's hash function for the given 8-bit string.''' + h = 5381 + for c in s: + h = (((h << 5) + h) ^ ord(c)) & 0xffffffff + return h + +try: + from _cdblib import djb_hash +except ImportError: + djb_hash = py_djb_hash + +read_2_le4 = Struct('<LL').unpack +write_2_le4 = Struct('<LL').pack + + +class Reader(object): + '''A dictionary-like object for reading a Constant Database accessed + through a string or string-like sequence, such as mmap.mmap().''' + + def __init__(self, data, hashfn=djb_hash): + '''Create an instance reading from a sequence and using hashfn to hash + keys.''' + if len(data) < 2048: + raise IOError('CDB too small') + + self.data = data + self.hashfn = hashfn + + self.index = [read_2_le4(data[i:i+8]) for i in xrange(0, 2048, 8)] + self.table_start = min(p[0] for p in self.index) + # Assume load load factor is 0.5 like official CDB. + self.length = sum(p[1] >> 1 for p in self.index) + + def iteritems(self): + '''Like dict.iteritems(). Items are returned in insertion order.''' + pos = 2048 + while pos < self.table_start: + klen, dlen = read_2_le4(self.data[pos:pos+8]) + pos += 8 + + key = self.data[pos:pos+klen] + pos += klen + + data = self.data[pos:pos+dlen] + pos += dlen + + yield key, data + + def items(self): + '''Like dict.items().''' + return list(self.iteritems()) + + def iterkeys(self): + '''Like dict.iterkeys().''' + return (p[0] for p in self.iteritems()) + __iter__ = iterkeys + + def itervalues(self): + '''Like dict.itervalues().''' + return (p[1] for p in self.iteritems()) + + def keys(self): + '''Like dict.keys().''' + return [p[0] for p in self.iteritems()] + + def values(self): + '''Like dict.values().''' + return [p[1] for p in self.iteritems()] + + def __getitem__(self, key): + '''Like dict.__getitem__().''' + value = self.get(key) + if value is None: + raise KeyError(key) + return value + + def has_key(self, key): + '''Return True if key exists in the database.''' + return self.get(key) is not None + __contains__ = has_key + + def __len__(self): + '''Return the number of records in the database.''' + return self.length + + def gets(self, key): + '''Yield values for key in insertion order.''' + # Truncate to 32 bits and remove sign. + h = self.hashfn(key) & 0xffffffff + start, nslots = self.index[h & 0xff] + + if nslots: + end = start + (nslots << 3) + slot_off = start + (((h >> 8) % nslots) << 3) + + for pos in chain(xrange(slot_off, end, 8), + xrange(start, slot_off, 8)): + rec_h, rec_pos = read_2_le4(self.data[pos:pos+8]) + + if not rec_h: + break + elif rec_h == h: + klen, dlen = read_2_le4(self.data[rec_pos:rec_pos+8]) + rec_pos += 8 + + if self.data[rec_pos:rec_pos+klen] == key: + rec_pos += klen + yield self.data[rec_pos:rec_pos+dlen] + + def get(self, key, default=None): + '''Get the first value for key, returning default if missing.''' + # Avoid exception catch when handling default case; much faster. + return chain(self.gets(key), (default,)).next() + + def getint(self, key, default=None, base=0): + '''Get the first value for key converted it to an int, returning + default if missing.''' + value = self.get(key, default) + if value is not default: + return int(value, base) + return value + + def getints(self, key, base=0): + '''Yield values for key in insertion order after converting to int.''' + return (int(v, base) for v in self.gets(key)) + + def getstring(self, key, default=None, encoding='utf-8'): + '''Get the first value for key decoded as unicode, returning default if + not found.''' + value = self.get(key, default) + if value is not default: + return value.decode(encoding) + return value + + def getstrings(self, key, encoding='utf-8'): + '''Yield values for key in insertion order after decoding as + unicode.''' + return (v.decode(encoding) for v in self.gets(key)) + + +class Writer(object): + '''Object for building new Constant Databases, and writing them to a + seekable file-like object.''' + + def __init__(self, fp, hashfn=djb_hash): + '''Create an instance writing to a file-like object, using hashfn to + hash keys.''' + self.fp = fp + self.hashfn = hashfn + + fp.write('\x00' * 2048) + self._unordered = [[] for i in xrange(256)] + + def put(self, key, value=''): + '''Write a string key/value pair to the output file.''' + assert type(key) is str and type(value) is str + + pos = self.fp.tell() + self.fp.write(write_2_le4(len(key), len(value))) + self.fp.write(key) + self.fp.write(value) + + h = self.hashfn(key) & 0xffffffff + self._unordered[h & 0xff].append((h, pos)) + + def puts(self, key, values): + '''Write more than one value for the same key to the output file. + Equivalent to calling put() in a loop.''' + for value in values: + self.put(key, value) + + def putint(self, key, value): + '''Write an integer as a base-10 string associated with the given key + to the output file.''' + self.put(key, str(value)) + + def putints(self, key, values): + '''Write zero or more integers for the same key to the output file. + Equivalent to calling putint() in a loop.''' + self.puts(key, (str(value) for value in values)) + + def putstring(self, key, value, encoding='utf-8'): + '''Write a unicode string associated with the given key to the output + file after encoding it as UTF-8 or the given encoding.''' + self.put(key, unicode.encode(value, encoding)) + + def putstrings(self, key, values, encoding='utf-8'): + '''Write zero or more unicode strings to the output file. Equivalent to + calling putstring() in a loop.''' + self.puts(key, (unicode.encode(value, encoding) for value in values)) + + def finalize(self): + '''Write the final hash tables to the output file, and write out its + index. The output file remains open upon return.''' + index = [] + for tbl in self._unordered: + length = len(tbl) << 1 + ordered = [(0, 0)] * length + for pair in tbl: + where = (pair[0] >> 8) % length + for i in chain(xrange(where, length), xrange(0, where)): + if not ordered[i][0]: + ordered[i] = pair + break + + index.append((self.fp.tell(), length)) + for pair in ordered: + self.fp.write(write_2_le4(*pair)) + + self.fp.seek(0) + for pair in index: + self.fp.write(write_2_le4(*pair)) + self.fp = None # prevent double finalize()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/coverage_distributions.py Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,187 @@ +#!/usr/bin/env python + +import os +import errno +import sys +import shutil +import subprocess +from Population import Population +import gd_composite + +################################################################################ + +def mkdir_p(path): + try: + os.makedirs(path) + except OSError, e: + if e.errno <> errno.EEXIST: + raise + +################################################################################ + +if len(sys.argv) < 7: + print >> sys.stderr, "Usage" + sys.exit(1) + +input, data_source, output, extra_files_path = sys.argv[1:5] + +individual_metadata = [] +population_info = [] +p1_input = None +all_individuals = False + +for arg in sys.argv[5:]: + if arg == 'all_individuals': + all_individuals = True + elif len(arg) > 12 and arg[:12] == 'individuals:': + p1_input = arg[12:] + elif len(arg) > 11: + if arg[:11] == 'population:': + file, name = arg[11:].split(':', 1) + population_info.append((file, name)) + elif arg[:11] == 'individual:': + individual_metadata.append(arg[11:]) + +p_total = Population() +p_total.from_tag_list(individual_metadata) + +################################################################################ + +mkdir_p(extra_files_path) + +################################################################################ + +prog = 'coverage' + +args = [] +args.append(prog) +args.append(input) +args.append(data_source) + +user_coverage_file = os.path.join(extra_files_path, 'coverage.txt') +args.append(user_coverage_file) + +population_list = [] + +if all_individuals: + tags = p_total.tag_list() +elif p1_input is not None: + p1 = Population() + this_pop = Population() + this_pop.from_population_file(p1_input) + population_list.append(this_pop) + p1.from_population_file(p1_input) + if not p_total.is_superset(p1): + print >> sys.stderr, 'There is an individual in the population that is not in the SNP table' + sys.exit(1) + tags = p1.tag_list() +else: + tags = [] + for population_file, population_name in population_info: + population = Population() + this_pop = Population() + this_pop.from_population_file(population_file) + population_list.append(this_pop) + population.from_population_file(population_file) + if not p_total.is_superset(population): + print >> sys.stderr, 'There is an individual in the {} population that is not in the SNP table'.format(population_name) + sys.exit(1) + columns = population.column_list() + for column in columns: + tags.append('{0}:{1}'.format(column, population_name)) + +for tag in tags: + args.append(tag) + +## text output +coverage_file = 'coverage.txt' +fh = open(coverage_file, 'w') +#print "args:", ' '.join(args) +p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=fh, stderr=sys.stderr) +rc = p.wait() +fh.close() + +## graphical output +fh = open(coverage_file) +coverage2_file = 'coverage2.txt' +ofh = open(coverage2_file, 'w') + +for line in fh: + line = line.rstrip('\r\n') + elems = line.split('\t') + name = elems.pop(0) + values = [ elems[0] ] + for idx in range(1, len(elems)): + val = str(float(elems[idx]) - float(elems[idx-1])) + values.append(val) + print >> ofh, '{0}\t{1}'.format(name, '\t'.join(values)) + +fh.close() +ofh.close() + +################################################################################ + +prog = 'R' + +args = [] +args.append(prog) +args.append('--vanilla') +args.append('--quiet') + +_realpath = os.path.realpath(__file__) +_script_dir = os.path.dirname(_realpath) +r_script_file = os.path.join(_script_dir, 'coverage_plot.r') + +ifh = open(r_script_file) +ofh = open('/dev/null', 'w') +#print "args:", ' '.join(args) +p = subprocess.Popen(args, bufsize=-1, stdin=ifh, stdout=ofh, stderr=None) +rc = p.wait() +ifh.close() +ofh.close() + +pdf_file = os.path.join(extra_files_path, 'coverage.pdf') +shutil.copy2('coverage.pdf', pdf_file) +os.remove('coverage.pdf') +os.remove(coverage2_file) + +################################################################################ + +info_page = gd_composite.InfoPage() +info_page.set_title('Coverage distributions Galaxy Composite Dataset') + +display_file = gd_composite.DisplayFile() +display_value = gd_composite.DisplayValue() + +out_pdf = gd_composite.Parameter(name='coverage.pdf', value='coverage.pdf', display_type=display_file) +out_txt = gd_composite.Parameter(name='coverage.txt', value='coverage.txt', display_type=display_file) + +info_page.add_output_parameter(out_pdf) +info_page.add_output_parameter(out_txt) + + +if data_source == '0': + data_source_value = 'sequence coverage' +elif data_source == '1': + data_source_value = 'estimated genotype' + +in_data_source = gd_composite.Parameter(description='Data source', value=data_source_value, display_type=display_value) + +info_page.add_input_parameter(in_data_source) + +if population_list: + misc_populations = gd_composite.Parameter(name='Populations', value=population_list, display_type=gd_composite.DisplayPopulationList()) + info_page.add_misc(misc_populations) +else: + misc_individuals = gd_composite.Parameter(name='Individuals', value=tags, display_type=gd_composite.DisplayTagList()) + info_page.add_misc(misc_individuals) + + + + +with open (output, 'w') as ofh: + print >> ofh, info_page.render() + + +sys.exit(0) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/coverage_distributions.xml Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,75 @@ +<tool id="gd_coverage_distributions" name="Coverage" version="1.0.0"> + <description>distributions</description> + + <command interpreter="python"> + coverage_distributions.py "$input" "0" "$output" "$output.extra_files_path" + #if $individuals.choice == '0' + "all_individuals" + #else if $individuals.choice == '1' + #set $arg = 'individuals:%s' % str($individuals.p1_input) + "$arg" + #else if $individuals.choice == '2' + #for $population in $individuals.populations + #set $arg = 'population:%s:%s' % (str($population.p_input), str($population.p_input.name)) + "$arg" + #end for + #end if + #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns) + #set $individual_arg = 'individual:%s:%s' % ($individual_col, $individual) + "$individual_arg" + #end for + </command> + + <inputs> + <param name="input" type="data" format="wsf" label="SNP table" /> + + <conditional name="individuals"> + <param name="choice" type="select" label="Individuals"> + <option value="0" selected="true">All</option> + <option value="1">Individuals in a population</option> + <option value="2">Population totals</option> + </param> + <when value="0" /> + <when value="1"> + <param name="p1_input" type="data" format="ind" label="Population individuals" /> + </when> + <when value="2"> + <repeat name="populations" title="Population" min="1"> + <param name="p_input" type="data" format="ind" label="individuals" /> + </repeat> + </when> + </conditional> + + <!-- + <param name="data_source" type="select" label="Data source"> + <option value="0" selected="true">Sequence coverage</option> + <option value="1">Genotype quality</option> + </param> + --> + </inputs> + + <outputs> + <data name="output" format="html" /> + </outputs> + + <tests> + <test> + <param name="input" value="genome_diversity/test_in/sample.wsf" ftype="wsf" /> + <param name="choice" value="0" /> + <output name="output" file="genome_diversity/test_out/coverage_distributions/coverage.html" ftype="html" compare="diff" lines_diff="2"> + <extra_files type="file" name='coverage.pdf' value="genome_diversity/test_out/coverage_distributions/coverage.pdf" compare="sim_size" delta = "1000"/> + <extra_files type="file" name='coverage.txt' value="genome_diversity/test_out/coverage_distributions/coverage.txt" /> + </output> + </test> + </tests> + + <help> +**What it does** + +This tool reports distributions of SNP reliability indicators for +individuals or populations. The reliability is measured by the sequence +coverage. Textual and graphical reports are generated, where the text +output gives the cumulative distributions. + </help> +</tool> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/coverage_plot.r Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,31 @@ +x <- read.table('coverage2.txt', skip=1, sep='\t') + +individuals <- dim(x)[1] +max_cov <- dim(x)[2] - 2 +max_val <- max(x[-1]) / 100 +colors <- rainbow(individuals) + +line_width = 3 +xt = t(x) + +xvals <- c(0:max_cov) +values <- as.numeric(as.vector(xt[,1][-1]))/100 + +pdf(file='coverage.pdf', onefile=TRUE, width=10, height=6); + +plot(xvals, values, type='l', ylim=c(0, max_val), xlim=c(0, max_cov), col=colors[1], lwd=line_width, xlab="Coverage", ylab="Proportion") + +if (individuals > 1) { + for (i in 2:individuals) { + values <- as.numeric(as.vector(xt[,i][-1]))/100; + lines(xvals, values, col=colors[i], lwd=line_width); + } +} + + +names <- as.vector(t(x[1])) +legend(x='topright', legend=names, fill=colors, bty='n') + +dev.off() + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes_conf.xml Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,15 @@ +<?xml version="1.0"?> +<datatypes> + <datatype_files> + <datatype_file name="wsf.py"/> + </datatype_files> + <registration> + <datatype extension="bigwigpos" type="galaxy.datatypes.wsf:BigWigPos" mimetype="application/octet-stream" display_in_upload="true"> + <display file="display_applications/bigwig_pos.xml" /> + </datatype> + <datatype extension="ind" type="galaxy.datatypes.wsf:Individuals" display_in_upload="true"/> + <datatype extension="wped" type="galaxy.datatypes.wsf:Wped" display_in_upload="true"/> + <datatype extension="wsf" type="galaxy.datatypes.wsf:SnpFile" display_in_upload="true"/> + <datatype extension="wpf" type="galaxy.datatypes.wsf:SapFile" display_in_upload="true"/> + </registration> +</datatypes>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/display_applications/bigwig_pos.xml Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,16 @@ +<display id="ucsc_bigwig_pos" version="1.0.0" name="display at UCSC"> + <!-- Load links from file: one line to one link --> + <dynamic_links from_file="tool-data/shared/ucsc/ucsc_build_sites.txt" skip_startswith="#" id="0" name="0"> + <!-- Define parameters by column from file, allow splitting on builds --> + <dynamic_param name="site_id" value="0"/> + <dynamic_param name="ucsc_link" value="1"/> + <dynamic_param name="builds" value="2" split="True" separator="," /> + <!-- Filter out some of the links based upon matching site_id to a Galaxy application configuration parameter and by dataset dbkey --> + <filter>${site_id in $APP.config.ucsc_display_sites}</filter> + <filter>${dataset.dbkey in $builds}</filter> + <!-- We define url and params as normal, but values defined in dynamic_param are available by specified name --> + <url>${ucsc_link}db=${qp($bigwig_file.dbkey)}&hgt.customText=${qp($track.url)}</url> + <param type="data" name="bigwig_file" url="galaxy_${DATASET_HASH}.bigwig" strip_https="False" /> + <param type="template" name="track" viewable="True" strip_https="False">track type=bigWig name="${bigwig_file.name}" bigDataUrl=${bigwig_file.url} db=${bigwig_file.dbkey}</param> + </dynamic_links> +</display>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dpmix.py Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,163 @@ +#!/usr/bin/env python + +import errno +import sys +import os +import subprocess +from Population import Population +import gd_composite +from dpmix_plot import make_dpmix_plot + +################################################################################ + +def mkdir_p(path): + try: + os.makedirs(path) + except OSError, e: + if e.errno <> errno.EEXIST: + raise + +def run_program(prog, args, stdout_file=None, space_to_tab=False): + #print "args: ", ' '.join(args) + p = subprocess.Popen(args, bufsize=-1, executable=prog, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + (stdoutdata, stderrdata) = p.communicate() + rc = p.returncode + + if stdout_file is not None: + with open(stdout_file, 'w') as ofh: + lines = stdoutdata.split('\n') + for line in lines: + line = line.strip() + if line: + if space_to_tab: + line = line.replace(' ', '\t') + print >> ofh, line + + if rc != 0: + print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args)) + print >> sys.stderr, stderrdata + sys.exit(1) + +################################################################################ + +if len(sys.argv) < 14: + print "usage" + sys.exit(1) + +input, data_source, switch_penalty, ap1_input, ap2_input, p_input, output, output2, output2_dir, dbkey, ref_column, galaxy_data_index_dir = sys.argv[1:13] +individual_metadata = sys.argv[13:] + +chrom = 'all' +analyze_additional = '0' +add_logs = '0' + +population_list = [] + +p_total = Population() +p_total.from_tag_list(individual_metadata) + +ap1 = Population(name='Ancestral population 1') +ap1.from_population_file(ap1_input) +population_list.append(ap1) +if not p_total.is_superset(ap1): + print >> sys.stderr, 'There is an individual in ancestral population 1 that is not in the SNP table' + sys.exit(1) + +ap2 = Population(name='Ancestral population 2') +ap2.from_population_file(ap2_input) +population_list.append(ap2) +if not p_total.is_superset(ap2): + print >> sys.stderr, 'There is an individual in ancestral population 2 that is not in the SNP table' + sys.exit(1) + +p = Population(name='Potentially admixed') +p.from_population_file(p_input) +population_list.append(p) +if not p_total.is_superset(p): + print >> sys.stderr, 'There is an individual in the population that is not in the SNP table' + sys.exit(1) + +mkdir_p(output2_dir) + +################################################################################ +# Create tabular file +################################################################################ + +misc_file = os.path.join(output2_dir, 'misc.txt') + +prog = 'dpmix' +args = [ prog ] +args.append(input) +args.append(ref_column) +args.append(chrom) +args.append(data_source) +args.append(add_logs) +args.append(switch_penalty) +args.append(analyze_additional) +args.append(misc_file) + +columns = ap1.column_list() +for column in columns: + args.append('{0}:1:{1}'.format(column, ap1.individual_with_column(column).name)) + +columns = ap2.column_list() +for column in columns: + args.append('{0}:2:{1}'.format(column, ap2.individual_with_column(column).name)) + +columns = p.column_list() +for column in columns: + args.append('{0}:0:{1}'.format(column, p.individual_with_column(column).name)) + +run_program(None, args, stdout_file=output, space_to_tab=True) + +################################################################################ +# Create pdf file +################################################################################ + +pdf_file = os.path.join(output2_dir, 'dpmix.pdf') +make_dpmix_plot(dbkey, output, pdf_file, galaxy_data_index_dir) + +################################################################################ +# Create html +################################################################################ + +info_page = gd_composite.InfoPage() +info_page.set_title('dpmix Galaxy Composite Dataset') + +display_file = gd_composite.DisplayFile() +display_value = gd_composite.DisplayValue() + +out_pdf = gd_composite.Parameter(name='dpmix.pdf', value='dpmix.pdf', display_type=display_file) +out_misc = gd_composite.Parameter(name='misc.txt', value='misc.txt', display_type=display_file) + +info_page.add_output_parameter(out_pdf) +info_page.add_output_parameter(out_misc) + +if data_source == '0': + data_source_value = 'sequence coverage' +elif data_source == '1': + data_source_value = 'estimated genotype' + +if analyze_additional == '0': + analyze_additional_value = 'no' +elif analyze_additional == '1': + analyze_additional_value = 'yes' + +in_data_source = gd_composite.Parameter(description='Data source', value=data_source_value, display_type=display_value) +in_switch_penalty = gd_composite.Parameter(description='Switch penalty', value=switch_penalty, display_type=display_value) +in_analyze_additional = gd_composite.Parameter(description='Also analyze random chromosome', value=analyze_additional_value, display_type=display_value) + +info_page.add_input_parameter(in_data_source) +info_page.add_input_parameter(in_switch_penalty) +info_page.add_input_parameter(in_analyze_additional) + +misc_populations = gd_composite.Parameter(name='Populations', value=population_list, display_type=gd_composite.DisplayPopulationList()) + +info_page.add_misc(misc_populations) + +with open(output2, 'w') as ofh: + print >> ofh, info_page.render() + +sys.exit(0) + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dpmix.xml Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,67 @@ +<tool id="gd_dpmix" name="Admixture" version="1.0.0"> + <description>using dynamic programming</description> + + <command interpreter="python"> + dpmix.py "$input" "$data_source" "$switch_penalty" "$ap1_input" "$ap2_input" "$p_input" "$output" "$output2" "$output2.extra_files_path" "$input.dataset.metadata.dbkey" "$input.dataset.metadata.ref" "$GALAXY_DATA_INDEX_DIR" + #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns) + #set $arg = '%s:%s' % ($individual_col, $individual) + "$arg" + #end for + </command> + + <inputs> + <param name="input" type="data" format="wsf" label="Dataset"> + <validator type="unspecified_build" message="This dataset does not have a reference species and cannot be used with this tool" /> + </param> + <param name="ap1_input" type="data" format="ind" label="Ancestral population 1 individuals" /> + <param name="ap2_input" type="data" format="ind" label="Ancestral population 2 individuals" /> + <param name="p_input" type="data" format="ind" label="Potentially admixed individuals" /> + + <param name="data_source" type="select" format="integer" label="Data source"> + <option value="0" selected="true">sequence coverage</option> + <option value="1">estimated genotype</option> + </param> + + <param name="switch_penalty" type="integer" min="0" value="10" label="Switch penalty" /> + </inputs> + + <outputs> + <data name="output" format="tabular" /> + <data name="output2" format="html" /> + </outputs> + + <tests> + <test> + <param name="input" value="genome_diversity/test_in/sample.wsf" ftype="wsf" /> + <param name="ap1_input" value="genome_diversity/test_in/a.ind" ftype="ind" /> + <param name="ap2_input" value="genome_diversity/test_in/b.ind" ftype="ind" /> + <param name="p_input" value="genome_diversity/test_in/c.ind" ftype="ind" /> + <param name="data_source" value="0" /> + <param name="switch_penalty" value="10" /> + + <output name="output" file="genome_diversity/test_out/dpmix/dpmix.tabular" /> + + <output name="output2" file="genome_diversity/test_out/dpmix/dpmix.html" ftype="html" compare="diff" lines_diff="2"> + <extra_files type="file" name='dpmix.pdf' value="genome_diversity/test_out/dpmix/dpmix.pdf" compare="sim_size" delta = "10000" /> + <extra_files type="file" name='misc.txt' value="genome_diversity/test_out/dpmix/misc.txt" /> + </output> + </test> + </tests> + + <help> +**What it does** + +The user specifies two "ancestral" populations (i.e., sources for +chromosomes) and a set of potentially admixed individuals, and chooses +between the sequence coverage or the estimated genotypes to measure +the similarity of genomic intervals in admixed individuals to the two +classes of ancestral chromosomes. The user also picks a "switch penalty", +typically between 10 and 100. For each potentially admixed individual, +the program divides the genome into three "genotypes": (0) homozygous +for the second ancestral population (i.e., both chromosomes from that +population), (1) heterozygous, or (2) homozygous for the second ancestral +population. Smaller values of the switch penalty (corresponding to more +ancient admixture events) generally lead to the reconstruction of more +frequent changes between genotypes. + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dpmix_plot.py Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,295 @@ +#!/usr/bin/env python + +import os +import sys +import math +import matplotlib as mpl +mpl.use('PDF') +import matplotlib.pyplot as plt +from matplotlib.path import Path +import matplotlib.patches as patches + +################################################################################ + +def build_chrom_len_dict(dbkey, galaxy_data_index_dir): + chrom_len_root = os.path.join(galaxy_data_index_dir, 'shared/ucsc/chrom') + chrom_len_file = '{0}.len'.format(dbkey) + chrom_len_path = os.path.join(chrom_len_root, chrom_len_file) + + chrom_len = {} + + try: + with open(chrom_len_path) as fh: + for line in fh: + line = line.rstrip('\r\n') + elems = line.split() + if len(elems) == 2: + chrom = elems[0] + length = int(elems[1]) + chrom_len[chrom] = length + except: + pass + + return chrom_len + +def parse_input_file(input_file): + chroms = [] + individuals = [] + data = {} + chrom_len = {} + + with open(input_file) as fh: + for line in fh: + line = line.strip() + if line: + elems = line.split() + chrom = elems[0] + p1, p2, state = map(int, elems[1:4]) + id = elems[4] + + if chrom not in chroms: + chroms.append(chrom) + + if id not in individuals: + individuals.append(id) + + data.setdefault(chrom, {}) + data[chrom].setdefault(id, []) + data[chrom][id].append((p1, p2, state)) + + if p2 > chrom_len.setdefault(chrom, 0): + chrom_len[chrom] = p2 + + return chroms, individuals, data, chrom_len + +def check_chroms(chroms, chrom_len, dbkey): + error = 0 + for chrom in chroms: + if chrom not in chrom_len: + print >> sys.stderr, "Can't find length for {0} chromosome {1}".format(dbkey, chrom) + error = 1 + if error: + sys.exit(1) + +def check_data(data, chrom_len, dbkey): + error = 0 + for chrom in data: + chrom_beg = 0 + chrom_end = chrom_len[chrom] + for individual in data[chrom]: + for p1, p2, state in data[chrom][individual]: + if p1 >= p2: + print >> sys.stderr, "Bad data line: begin >= end: {0} {1} {2} {3}".format(chrom, p1, p2, state, individual) + error = 1 + if p1 < chrom_beg or p2 > chrom_end: + print >> sys.stderr, "Bad data line: outside {0} boundaries[{1} - {2}]: {3} {4} {5} {6}".format(dbkey, chrom_beg, chrom_end, chrom, p1, p2, state, individual) + error = 1 + if error: + sys.exit(1) + +def make_rectangle(p1, p2, color, bottom=0.0, top=1.0): + verts = [ + (p1, bottom), # left, bottom + (p1, top), # left, top + (p2, top), # right, top + (p2, bottom), # right, bottom + (0.0, 0.0) # ignored + ] + + codes = [ + Path.MOVETO, + Path.LINETO, + Path.LINETO, + Path.LINETO, + Path.CLOSEPOLY + ] + + path = Path(verts, codes) + return patches.PathPatch(path, facecolor=color, lw=0) + +def make_split_rectangle(p1, p2, top_color, bottom_color): + patch1 = make_rectangle(p1, p2, bottom_color, top=0.5) + patch2 = make_rectangle(p1, p2, top_color, bottom=0.5) + return [patch1, patch2] + +def make_state_rectangle(p1, p2, state, chrom, individual): + if state == 0: + return [ make_rectangle(p1, p2, 'r') ] + elif state == 1: + return make_split_rectangle(p1, p2, 'r', 'g') + elif state == 2: + return [ make_rectangle(p1, p2, 'g') ] + else: + print >> sys.stderr, "Unknown state: {0}: {1} {2} {3} {4}".format(state, chrom, p1, p2, state, individual) + sys.exit(1) + +def nicenum(num, round=False): + if num == 0: + return 0.0 + + exp = int(math.floor(math.log10(num))) + f = num / math.pow(10, exp) + + if round: + if f < 1.5: + nf = 1.0 + elif f < 3.0: + nf = 2.0 + elif f < 7.0: + nf = 5.0 + else: + nf = 10.0 + else: + if f <= 1.0: + nf = 1.0 + elif f <= 2.0: + nf = 2.0 + elif f <= 5.0: + nf = 5.0 + else: + nf = 10.0 + + return nf * pow(10, exp) + +def tick_foo(beg, end, loose=False): + ntick = 10 + + range = nicenum(end - beg, round=False) + d = nicenum(range/(ntick - 1), round=True) + digits = int(math.floor(math.log10(d))) + + if loose: + graph_min = math.floor(beg/d) * d + graph_max = math.ceil(end/d) * d + else: + graph_min = beg + graph_max = end + + nfrac = max([-1 * digits, 0]) + vals = [] + + stop = graph_max + if loose: + stop = graph_max + (0.5 * d) + + x = graph_min + while x <= stop: + vals.append(int(x)) + x += d + + vals = vals[1:] + +# if not loose: +# if vals[-1] < graph_max: +# vals.append(int(graph_max)) + + labels = [] + for val in vals: + labels.append('{0}'.format(int(val/math.pow(10, digits)))) + +# labels.append('{0:.1f}'.format(vals[-1]/math.pow(10, digits))) + + return vals, labels + +################################################################################ + +def make_dpmix_plot(input_dbkey, input_file, output_file, galaxy_data_index_dir): + fs_chrom_len = build_chrom_len_dict(input_dbkey, galaxy_data_index_dir) + chroms, individuals, data, chrom_len = parse_input_file(input_file) + + for chrom in chrom_len.keys(): + if chrom in fs_chrom_len: + chrom_len[chrom] = fs_chrom_len[chrom] + + #check_chroms(chroms, chrom_len, input_dbkey) + check_data(data, chrom_len, input_dbkey) + + ## units below are inches + top_space = 0.10 + chrom_space = 0.25 + chrom_height = 0.25 + ind_space = 0.10 + ind_height = 0.25 + + total_height = 0.0 + at_top = True + for chrom in chroms: + if at_top: + total_height += (top_space + chrom_height) + at_top = False + else: + total_height += (top_space + chrom_space + chrom_height) + + individual_count = 0 + for individual in individuals: + if individual in data[chrom]: + individual_count += 1 + total_height += individual_count * (ind_space + ind_height) + + width = 7.5 + height = math.ceil(total_height) + + bottom = 1.0 + + fig = plt.figure(figsize=(width, height)) + + at_top = True + for_webb = False + + for chrom in chroms: + length = chrom_len[chrom] + vals, labels = tick_foo(0, length) + + if at_top: + bottom -= (top_space + chrom_height)/height + at_top = False + else: + bottom -= (top_space + chrom_space + chrom_height)/height + + if not for_webb: + ax = fig.add_axes([0.0, bottom, 1.0, chrom_height/height]) + plt.axis('off') + plt.text(0.5, 0.5, chrom, fontsize=14, ha='center') + + individual_count = 0 + for individual in individuals: + if individual in data[chrom]: + individual_count += 1 + + i = 0 + for individual in individuals: + if individual in data[chrom]: + i += 1 + + bottom -= (ind_space + ind_height)/height + if not for_webb: + # [left, bottom, width, height] + ax1 = fig.add_axes([0.0, bottom, 0.09, ind_height/height]) + plt.axis('off') + plt.text(1.0, 0.5, individual, fontsize=10, ha='right', va='center') + # [left, bottom, width, height] + ax2 = fig.add_axes([0.10, bottom, 0.88, ind_height/height], frame_on=False) + ax2.set_xlim(0, length) + ax2.set_ylim(0, 1) + if i != individual_count: + plt.axis('off') + else: + if not for_webb: + ax2.tick_params(top=False, left=False, right=False, labelleft=False) + ax2.set_xticks(vals) + ax2.set_xticklabels(labels) + else: + plt.axis('off') + for p1, p2, state in sorted(data[chrom][individual]): + for patch in make_state_rectangle(p1, p2, state, chrom, individual): + ax2.add_patch(patch) + + plt.savefig(output_file) + +################################################################################ + +if __name__ == '__main__': + input_dbkey, input_file, output_file, galaxy_data_index_dir = sys.argv[1:5] + make_dpmix_plot(input_dbkey, input_file, output_file, galaxy_data_index_dir) + sys.exit(0) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/echo.bash Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +if [ $# -lt 3 ]; then + echo "usage" + exit 1 +fi + +input="$1" +output="$2" +shift 2 + +for individual in "$@"; do + echo "$individual" >> "$output" +done + +exit 0 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/evaluate_population_numbers.bash Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +if [ $# -ne 3 ]; then + echo "usage" + exit 1 +fi + +input_ped_file="$1" +output_file="$2" +max_populations="$3" + +ADMIXTURE=admixture + +for (( i=1; $i <= $max_populations; i++ )); do + $ADMIXTURE --cv "$input_ped_file" $i 2>&1 | grep CV >> "$output_file" +done +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extract_flanking_dna.py Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,89 @@ +#!/usr/bin/env python2.5 + +import os +import sys +from optparse import OptionParser +import genome_diversity as gd + +def main_function( parse_arguments=None ): + if parse_arguments is None: + parse_arguments = lambda arguments: ( None, arguments ) + def main_decorator( to_decorate ): + def decorated_main( arguments=None ): + if arguments is None: + arguments = sys.argv + options, arguments = parse_arguments( arguments ) + rc = 1 + try: + rc = to_decorate( options, arguments ) + except Exception, err: + sys.stderr.write( 'ERROR: %s\n' % str( err ) ) + traceback.print_exc() + finally: + sys.exit( rc ) + return decorated_main + return main_decorator + +def parse_arguments( arguments ): + parser = OptionParser() + parser.add_option('--input', + type='string', dest='input', + help='file of selected SNPs') + parser.add_option('--output', + type='string', dest='output', + help='output file') + parser.add_option('--snps_loc', + type='string', dest='snps_loc', + help='snps .loc file') + parser.add_option('--scaffold_col', + type="int", dest='scaffold_col', + help='scaffold column in the input file') + parser.add_option('--pos_col', + type="int", dest='pos_col', + help='position column in the input file') + parser.add_option('--output_format', + type="string", dest='output_format', + help='output format, fasta or primer3') + parser.add_option('--species', + type="string", dest='species', + help='species') + return parser.parse_args( arguments[1:] ) + + +@main_function( parse_arguments ) +def main( options, arguments ): + if not options.input: + raise RuntimeError( 'missing --input option' ) + if not options.output: + raise RuntimeError( 'missing --output option' ) + if not options.snps_loc: + raise RuntimeError( 'missing --snps_loc option' ) + if not options.scaffold_col: + raise RuntimeError( 'missing --scaffold_col option' ) + if not options.pos_col: + raise RuntimeError( 'missing --pos_col option' ) + if not options.output_format: + raise RuntimeError( 'missing --output_format option' ) + if not options.species: + raise RuntimeError( 'missing --species option' ) + + snps = gd.SnpFile( filename=options.input, seq_col=int( options.scaffold_col ), pos_col=int( options.pos_col ) ) + + out_fh = gd._openfile( options.output, 'w' ) + + snpcalls_file = gd.get_filename_from_loc( options.species, options.snps_loc ) + file_root, file_ext = os.path.splitext( snpcalls_file ) + snpcalls_index_file = file_root + ".cdb" + snpcalls = gd.SnpcallsFile( data_file=snpcalls_file, index_file=snpcalls_index_file ) + + while snps.next(): + seq, pos = snps.get_seq_pos() + flanking_dna = snpcalls.get_flanking_dna( sequence=seq, position=pos, format=options.output_format ) + if flanking_dna: + out_fh.write( flanking_dna ) + + out_fh.close() + +if __name__ == "__main__": + main() +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extract_flanking_dna.xml Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,93 @@ +<tool id="gd_extract_flanking_dna" name="Extract" version="1.0.0"> + <description>DNA flanking chosen SNPs</description> + + <command interpreter="python"> + extract_flanking_dna.py "--input=$input" "--output=$output" "--snps_loc=${GALAXY_DATA_INDEX_DIR}/gd.snps.loc" + #if $override_metadata.choice == "0": + "--scaffold_col=${input.metadata.scaffold}" "--pos_col=${input.metadata.pos}" "--species=${input.metadata.species}" + #else + "--scaffold_col=$scaf_col" "--pos_col=$pos_col" "--species=$species" + #end if + "--output_format=$output_format" + </command> + + <inputs> + <param format="tabular" name="input" type="data" label="Selected SNPS dataset"/> + <param name="output_format" type="select" format="integer" label="output format"> + <option value="fasta" selected="true">FastA format</option> + <option value="primer3">Primer3 input</option> + </param> + <conditional name="override_metadata"> + <param name="choice" type="select" format="integer" label="choose columns"> + <option value="0" selected="true">No, get columns from metadata</option> + <option value="1" >Yes, choose columns</option> + </param> + <when value="0" /> + <when value="1"> + <param name="scaf_col" type="data_column" data_ref="input" numerical="false" label="Column with scaffold"/> + <param name="pos_col" type="data_column" data_ref="input" numerical="true" label="Column with position"/> + <param name="species" type="select" label="Choose species"> + <options from_file="gd.species.txt"> + <column name="name" index="1"/> + <column name="value" index="0"/> + </options> + </param> + </when> + </conditional> + </inputs> + + <outputs> + <data format="txt" name="output"/> + </outputs> + + <!-- Need snpcalls files from Webb before uncommenting + <tests> + <test> + <param name="input" value="genome_diversity/test_out/select_snps/select_snps.wsf" ftype="wsf" /> + <param name="output_format" value="primer3" /> + <param name="choice" value="0" /> + <output name="output" file="genome_diversity/test_out/extract_flanking_dna/extract_flanking_dna.txt" /> + </test> + </tests> + --> + + <help> +**What it does** + + It reports a DNA segment containing each SNP, with up to 200 nucleotides on + either side of the SNP position, which is indicated by "n". Fewer nucleotides + are reported if the SNP is near an end of the assembled genome fragment. + +----- + +**Example** + +- input file:: + + chr2_75111355_75112576 314 A C L F chr2 75111676 C F 15 4 53 2 9 48 Y 96 0.369 0.355 0.396 0 + chr8_93901796_93905612 2471 A C A A chr8 93904264 A A 8 0 51 10 2 14 Y 961 0.016 0.534 0.114 2 + chr10_7434473_7435447 524 T C S S chr10 7435005 T S 11 5 90 14 0 69 Y 626 0.066 0.406 0.727 0 + chr14_80021455_80022064 138 G A H H chr14 80021593 G H 14 0 69 9 6 124 Y 377 0.118 0.997 0.195 1 + chr15_64470252_64471048 89 G A Y Y chr15 64470341 G Y 5 6 109 14 0 69 Y 312 0.247 0.998 0.393 0 + chr18_48070585_48071386 514 C T E K chr18 48071100 T K 7 7 46 14 0 69 Y 2 0.200 0.032 0.163 0 + chr18_50154905_50155664 304 A G Y C chr18 50155208 A Y 4 2 17 5 1 22 Y 8 0.022 0.996 0.128 0 + chr18_57379354_57380496 315 C T V V chr18 57379669 G V 11 0 60 9 6 62 Y 726 0.118 0.048 0.014 1 + chr19_14240610_14242055 232 C T A V chr19 14240840 C A 18 8 56 15 5 42 Y 73 0.003 0.153 0.835 0 + chr19_39866997_39874915 3117 C T P P chr19 39870110 C P 3 7 65 14 2 32 Y 6 0.321 0.911 0.462 4 + etc. + +- output file:: + + > chr2_75111355_75112576 314 A C + TATCTTCATTTTTATTATAGACTCTCTGAACCAATTTGCCCTGAGGCAGACTTTTTAAAGTACTGTGTAATGTATGAAGTCCTTCTGCTCAAGCAAATCATTGGCATGAAAACAGTTGCAAACTTATTGTGAGAGAAGAGTCCAAGAGTTTTAACAGTCTGTAAGTATATAGCCTGTGAGTTTGATTTCCTTCTTGTTTTTnTTCCAGAAACATGATCAGGGGCAAGTTCTATTGGATATAGTCTTCAAGCATCTTGATTTGACTGAGCGTGACTATTTTGGTTTGCAGTTGACTGACGATTCCACTGATAACCCAGTAAGTTTAAGCTGTTGTCTTTCATTGTCATTGCAATTTTTCTGTCTTTATACTAGGTCCTTTCTGATTTACATTGTTCACTGATT + > chr8_93901796_93905612 2471 A C + GCTGCCGCTGGATTTACTTCTGCTTGGGTCGAGAGCGGGCTGGATGGGTGAAGAGTGGGCTCCCCGGCCCCTGACCAGGCAGGTGCAGACAAGTCGGAAGAAGGCCCGCCGCATCTCCTTGCTGGCCAGCGTGTAGATGACGGGGTTCATGGCAGAGTTGAGCACGGCCAGCACGATGAACCACTGGGCCTTGAACAGGATnGCGCACTCCTTCACCTTGCAGGCCACATCCACAAGGAAAAGGATGAAGAGTGGGGACCAGCAGGCGATGAACACGCTCACCACGATCACCACGGTCCGCAGCAGGGCCATGGACCGCTCTGAGTTGTGCGGGCTGGCCACCCTGCGGCTGCTGGACTTCACCAGGAAGTAGATGCGTGCGTACAGGATCACGATGGTCAC + > chr10_7434473_7435447 524 T C + ATTATTAACAGAAACATTTCTTTTTCATTACCCAGGGGTTACACTGGTCGTTGATGTTAATCAGTTTTTGGAGAAGGAGAAGCAAAGTGATATTTTGTCTGTTCTGAAGCCTGCCGTTGGTAATACAAATGACGTAATCCCTGAATGTGCTGACAGGTACCATGACGCCCTGGCAAAAGCAAAAGAGCAAAAATCTAGAAGnGGTAAGCATCTTCACTGTTTAGCACAAATTAAATAGCACTTTGAATATGATGATTTCTGTGGTATTGTGTTATCTTACTTTTGAGACAAATAATCGCTTTCAAATGAATATTTCTGAATGTTTGTCATCTCTGGCAAGGAAATTTTTTAGTGTTTCTTTTCCTTTTTTGTCTTTTGGAAATCTGTGATTAACTTGGTGGC + > chr14_80021455_80022064 138 G A + ACCCAGGGATCAAACCCAGGTCTCCCGCATTGCAGGCGGATTCTTTACTGTCTGAGCCTCCAGGGAAGCCCTCGGGGCTGAAGGGATGGTTATGAAGGTGAGAAACAGGGGCCACCTGTCCCCAAGGTACCTTGCGACnTGCCATCTGCGCTCCACCAGTAAATGGACGTCTTCGATCCTTCTGTTGTTGGCGTAGTGCAAACGTTTGGGAAGGTGCTGTTTCAAGTAAGGCTTAAAGTGCTGGTCTGGTTTTTTACACTGAAATATAAATGGACATTGGATTTTGCAATGGAGAGTCTTCTAGAAGAGTCCAAGACATTCTCTCCAGAAAGCTGAAGG + > chr15_64470252_64471048 89 G A + TGTGTGTGTGTGTGTGTGTGTGTGCCTGTGTCTGTACATGCACACCACGTGGCCTCACCCAGTGCCCTCAGCTCCATGGTGATGTCCACnTAGCCGTGCTCCGCGCTGTAGTACATGGCCTCCTGGAGGGCCTTGGTGCGCGTCCGGCTCAGGCGCATGGGCCCCTCGCTGCCGCTGCCCTGGCTGGATGCATCGCTCTCTTCCACGCCCTCAGCCAGGATCTCCTCCAGGGACAGCACATCTGCTTTGGCCTGCTGTGGCTGAGTCAGGAGCTTCCTCAGGACGTTCCT + etc. + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extract_primers.py Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,85 @@ +#!/usr/bin/env python2.5 + +import os +import sys +from optparse import OptionParser +import genome_diversity as gd + +def main_function( parse_arguments=None ): + if parse_arguments is None: + parse_arguments = lambda arguments: ( None, arguments ) + def main_decorator( to_decorate ): + def decorated_main( arguments=None ): + if arguments is None: + arguments = sys.argv + options, arguments = parse_arguments( arguments ) + rc = 1 + try: + rc = to_decorate( options, arguments ) + except Exception, err: + sys.stderr.write( 'ERROR: %s\n' % str( err ) ) + traceback.print_exc() + finally: + sys.exit( rc ) + return decorated_main + return main_decorator + +def parse_arguments( arguments ): + parser = OptionParser() + parser.add_option('--input', + type='string', dest='input', + help='file of selected SNPs') + parser.add_option('--output', + type='string', dest='output', + help='output file') + parser.add_option('--primers_loc', + type='string', dest='primers_loc', + help='primers .loc file') + parser.add_option('--scaffold_col', + type="int", dest='scaffold_col', + help='scaffold column in the input file') + parser.add_option('--pos_col', + type="int", dest='pos_col', + help='position column in the input file') + parser.add_option('--species', + type="string", dest='species', + help='species') + return parser.parse_args( arguments[1:] ) + + +@main_function( parse_arguments ) +def main( options, arguments ): + if not options.input: + raise RuntimeError( 'missing --input option' ) + if not options.output: + raise RuntimeError( 'missing --output option' ) + if not options.primers_loc: + raise RuntimeError( 'missing --primers_loc option' ) + if not options.scaffold_col: + raise RuntimeError( 'missing --scaffold_col option' ) + if not options.pos_col: + raise RuntimeError( 'missing --pos_col option' ) + if not options.species: + raise RuntimeError( 'missing --species option' ) + + snps = gd.SnpFile( filename=options.input, seq_col=int( options.scaffold_col ), pos_col=int( options.pos_col ) ) + + out_fh = gd._openfile( options.output, 'w' ) + + primer_data_file = gd.get_filename_from_loc( options.species, options.primers_loc ) + + file_root, file_ext = os.path.splitext( primer_data_file ) + primer_index_file = file_root + ".cdb" + primers = gd.PrimersFile( data_file=primer_data_file, index_file=primer_index_file ) + + while snps.next(): + seq, pos = snps.get_seq_pos() + primer = primers.get_entry( seq, pos ) + if primer: + out_fh.write( primer ) + + out_fh.close() + +if __name__ == "__main__": + main() +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extract_primers.xml Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,88 @@ +<tool id="gd_extract_primers" name="Extract primers" version="1.0.0"> + <description>for selected SNPs</description> + + <command interpreter="python"> + extract_primers.py "--input=$input" "--output=$output" "--primers_loc=${GALAXY_DATA_INDEX_DIR}/gd.primers.loc" + #if $override_metadata.choice == "0": + "--scaffold_col=${input.metadata.scaffold}" "--pos_col=${input.metadata.pos}" "--species=${input.metadata.species}" + #else + "--scaffold_col=$scaf_col" "--pos_col=$pos_col" "--species=$species" + #end if + </command> + + <inputs> + <param format="tabular" name="input" type="data" label="Selected SNPS dataset"/> + <conditional name="override_metadata"> + <param name="choice" type="select" format="integer" label="choose columns"> + <option value="0" selected="true">No, get columns from metadata</option> + <option value="1" >Yes, choose columns</option> + </param> + <when value="0" /> + <when value="1"> + <param name="scaf_col" type="data_column" data_ref="input" numerical="false" label="Column with scaffold"/> + <param name="pos_col" type="data_column" data_ref="input" numerical="true" label="Column with position"/> + <param name="species" type="select" label="Choose species"> + <options from_file="gd.species.txt"> + <column name="name" index="1"/> + <column name="value" index="0"/> + </options> + </param> + </when> + </conditional> + </inputs> + + <outputs> + <data format="txt" name="output"/> + </outputs> + + <tests> + <test> + <param name="input" value="genome_diversity/test_out/select_snps/select_snps.wsf" ftype="wsf" /> + <param name="choice" value="0"/> + <output name="output" file="genome_diversity/test_out/extract_primers/extract_primers.txt" /> + </test> + </tests> + + + <help> +**What it does** + + This tool extracts primers for SNPs in the dataset using the Primer3 program. + The first line of output for a given SNP reports the name of the assembled + contig, the SNP's position in the contig, the two variant nucleotides, and + Primer3's "pair penalty". The next line, if not blank, names restriction + enzymes (from the user-adjustable list) that differentially cut at that + site, but do not cut at any other position between and including the + primer positions. The next lines show the SNP's flanking regions, with + the SNP position indicated by "n", including the primer positions and an + additional 3 nucleotides. + +----- + +**Example** + +- input file:: + + chr5_30800874_30802049 734 G A chr5 30801606 A 24 0 99 4 11 97 Y 496 0.502 0.033 0.215 6 + chr8_55117827_55119487 994 A G chr8 55118815 G 25 0 102 4 11 96 Y 22 0.502 0.025 2.365 1 + chr9_100484836_100485311 355 C T chr9 100485200 T 27 0 108 6 17 100 Y 190 0.512 0.880 2.733 4 + chr12_3635530_3637738 2101 T C chr12 3637630 T 25 0 102 4 13 93 Y 169 0.554 0.024 0.366 4 + +- output file:: + + chr5_30800874_30802049 734 G A 0.352964 + BglII,MboI,Sau3AI,Tru9I,XhoII + 1 CTGAAGGTGAGCAGGATTCAGGAGACAGAAAACAAAGCCCAGGCCTGCCCAAGGTGGAAA + >>>>>>>>>>>>>>>>>>>> + + 61 AGTCTAACAACTCGCCCTCTGCTTAnATCTGAGACTCACAGGGATAATAACACACTTGGT + + + 21 CAAGGAATAAACTAGATATTATTCACTCCTCTAGAAGGCTGCCAGGAAAATTGCCTGACT + <<<<<<< + + 181 TGAACCTTGGCTCTGA + <<<<<<<<<<<<< + etc. + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/find_intervals.py Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,115 @@ +#!/usr/bin/env python + +import errno +import os +import subprocess +import sys + +################################################################################ + +def mkdir_p(path): + try: + os.makedirs(path) + except OSError, e: + if e.errno <> errno.EEXIST: + raise + +def run_program(prog, args, stdout_file=None): + #print "args:", ' '.join(args) + p = subprocess.Popen(args, bufsize=-1, executable=prog, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + (stdoutdata, stderrdata) = p.communicate() + rc = p.returncode + + if stdout_file is not None: + with open(stdout_file, 'w') as ofh: + print >> ofh, stdoutdata.rstrip('\r\n') + + if rc != 0: + print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args)) + print >> sys.stderr, stderrdata + sys.exit(1) + +################################################################################ + +if len(sys.argv) != 11: + print "usage" + sys.exit(1) + +input, dbkey, output, output_files_path, chrom_col, pos_col, score_col, shuffles, cutoff, report_snps = sys.argv[1:11] + +prog = 'sweep' + +args = [ prog ] +args.append(input) +args.append(chrom_col) +args.append(pos_col) +args.append(score_col) +args.append(cutoff) +args.append(shuffles) +args.append(report_snps) + +run_program(None, args, stdout_file=output) + +if report_snps == "0": + sys.exit(0) + +################################################################################ + +mkdir_p(output_files_path) + +bedgraph_filename = 'bedgraph.txt' +links_filename = os.path.join(output_files_path, 'links.txt') + +data = [] +links_data = [] + +with open(output) as fh: + chrom = None + for line in fh: + line = line.rstrip('\r\n') + if not line: + continue + if line[0] != ' ': + # chrom line, add a link + chrom, interval_begin, interval_end, interval_value = line.split('\t') + links_data.append((chrom, int(interval_begin), int(interval_end))) + else: + # data line, add a bedgraph line + begin, value = line.split() + data.append((chrom, int(begin), value)) + +with open(bedgraph_filename, 'w') as ofh: + print >> ofh, 'track type=bedGraph' + for chrom, begin, value in sorted(data): + print >> ofh, chrom, begin, begin+1, value + +with open(links_filename, 'w') as ofh: + for chrom, begin, end in sorted(links_data): + print >> ofh, chrom, begin, end + +################################################################################ + +chrom_sizes_filename = '{0}.chrom.sizes'.format(dbkey) + +prog = 'fetchChromSizes' + +args = [ prog ] +args.append(dbkey) + +run_program(None, args, stdout_file=chrom_sizes_filename) + +################################################################################ + +prog = 'bedGraphToBigWig' + +args = [ prog ] +args.append(bedgraph_filename) +args.append(chrom_sizes_filename) +args.append(output) + +run_program(None, args) + +################################################################################ + +sys.exit(0) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/find_intervals.xml Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,100 @@ +<tool id="gd_find_intervals" name="Find remarkable" version="1.0.0"> + <description>genomic intervals</description> + + <command interpreter="python"> + find_intervals.py "$input" "$input.metadata.dbkey" "$output" "$output.extra_files_path" + + #if $override_metadata.choice == "0" + "$input.metadata.ref" "$input.metadata.rPos" + #else + "$override_metadata.ref_col" "$override_metadata.rpos_col" + #end if + + "$score_col" "$shuffles" + + #if $cutoff.type == 'percentage' + "$cutoff.cutoff_pct" + #else + "=$cutoff.cutoff_val" + #end if + + "$out_format" + </command> + + <inputs> + <param name="input" type="data" format="tabular" label="Input"> + <validator type="unspecified_build" message="This dataset does not have a reference species and cannot be used with this tool" /> + </param> + + <param name="score_col" type="data_column" data_ref="input" numerical="true" label="Column with score"/> + + <conditional name="cutoff"> + <param name="type" type="select" label="Cutoff type"> + <option value="percentage">percentage</option> + <option value="value">value</option> + </param> + <when value="percentage"> + <param name="cutoff_pct" type="float" value="95" min="0" max="100" label="Percentage cutoff"/> + </when> + <when value="value"> + <param name="cutoff_val" type="float" value="0.0" label="Value cutoff"/> + </when> + </conditional> + + <param name="shuffles" type="integer" min="0" value="0" label="Number of randomizations"/> + + <param name="out_format" type="select" format="integer" label="Report SNPs"> + <option value="0" selected="true">No</option> + <option value="1">Yes</option> + </param> + + <conditional name="override_metadata"> + <param name="choice" type="select" format="integer" label="Choose columns"> + <option value="0" selected="true">No, get columns from metadata</option> + <option value="1" >Yes, choose columns</option> + </param> + <when value="0" /> + <when value="1"> + <param name="ref_col" type="data_column" data_ref="input" numerical="false" label="Column with reference chromosome"/> + <param name="rpos_col" type="data_column" data_ref="input" numerical="true" label="Column with reference position"/> + </when> + </conditional> + </inputs> + + <outputs> + <data name="output" format="interval"> + <change_format> + <when input="out_format" value="1" format="bigwigpos" /> + </change_format> + </data> + </outputs> + + <tests> + <test> + <param name="input" value="genome_diversity/test_in/sample.wsf" ftype="wsf" /> + <param name="score_col" value="5" /> + <param name="type" value="value" /> + <param name="cutoff_val" value="700.0" /> + <param name="shuffles" value="10" /> + <param name="out_format" value="0" /> + <param name="choice" value="0" /> + + <output name="output" file="genome_diversity/test_out/find_intervals/find_intervals.interval" /> + </test> + </tests> + + <help> +**What it does** + +The user selects a SNP table and specifies the columns containing (1) +chromosome, (2) position, (3) scores (such as an Fst-value for the SNP), (4) +a percentage or raw score for the "cutoff" and (5) the number of times the +data should be randomized (only intervals with score exceeding the maximum for +the randomized data are reported). If a percentage (e.g. 95%) is specified +for #3, then that percentile of the scores is used as the cutoff; this may +not work well if many SNPs have the same score. The program subtracts the +cutoff from every score, then finds genomic intervals (i.e., consecutive runs +of SNPs) whose total score cannot be increased by adding or subtracting one +or more SNPs at the ends of the interval. + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gd_composite.py Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,128 @@ +#!/usr/bin/env python + +from galaxy import eggs +import pkg_resources +pkg_resources.require( "Cheetah" ) +from Cheetah.Template import Template + +import errno +import os +from datetime import datetime + +################################################################################ + +def die(message): + print >> sys.stderr, message + sys.exit(1) + +def mkdir_p(path): + try: + os.makedirs(path) + except OSError, e: + if e.errno <> errno.EEXIST: + raise + +################################################################################ + +class Display(object): + def display(self, parameter): + print parameter + +class DisplayFile(Display): + def display(self, parameter): + return '<a href="{0}">{1}</a>'.format(parameter.value, parameter.name) + +class DisplayValue(Display): + def display(self, parameter): + if parameter.value is not None: + return '{0}: {1}'.format(parameter.description, parameter.value) + else: + return '{0}'.format(parameter.description) + +class DisplayTagList(Display): + def display(self, parameter): + rv = [] + if parameter.name: + rv.append(parameter.name) + rv.append('<ol>') + for tag in parameter.value: + col, individual_name = tag.split(':') + rv.append('<li>{0}</li>'.format(individual_name)) + rv.append('</ol>') + return '\n'.join(rv) + +class DisplayPopulationList(Display): + def display(self, parameter): + rv = [] + rv.append('Populations') + rv.append('<ul>') + for population in parameter.value: + rv.append('<li>') + if population.name is not None: + rv.append(population.name) + rv.append('<ol>') + for name in population.individual_names(): + rv.append('<li>{0}</li>'.format(name)) + rv.append('</ol>') + rv.append('</li>') + rv.append('</ul>') + return '\n'.join(rv) + +# def display(self, parameter, name=''): +# print '<ul> {0}'.format(name) +# for individual_name in parameter.individual_names(): +# print '<li>{0}>/li>'.format(individual_name) +# print '</ul>' + + +class Parameter(object): + def __init__(self, name=None, value=None, description=None, display_type=None): + self.name = name + self.value = value + self.description = description + if display_type is None: + self.display_type = Display() + else: + self.display_type = display_type + + def display(self): + return self.display_type.display(self) + +class InfoPage(object): + _realpath = os.path.realpath(__file__) + _script_dir = os.path.dirname(_realpath) + template_file = os.path.join(_script_dir, 'gd_composite_template.html') + def __init__(self): + self.timestamp = datetime.now().strftime('%Y-%m-%d %I:%M:%S %p') + self.title = 'Genome Diversity Composite Dataset' + self.inputs = [] + self.outputs = [] + self.misc = '' + self.template = self.load_template() + + def load_template(self): + with open(self.template_file) as f: + return f.read().rstrip('\r\n') + + def set_title(self, title): + self.title = title + + def add_input_parameter(self, parameter): + self.inputs.append(parameter) + + def add_output_parameter(self, parameter): + self.outputs.append(parameter) + + def add_misc(self, misc): + self.misc = misc + + def render(self): + return Template(self.template, searchList=[{'tool': self}]) + + + + + + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gd_composite_template.html Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,40 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml"> + <head> + <meta http-equiv="Content-type" content="text/html; charset=UTF-8" /> + <link rel="stylesheet" href="/static/style/base.css" type="text/css" /> + <title>${tool.title}</title> + </head> + <body> + <div class="document"> + Output completed: $tool.timestamp + <p/> + #if $tool.outputs + <div id="gd_outputs"> + Outputs + <ul> + #for output in $tool.outputs + <li>${output.display()}</li> + #end for + </ul> + </div> + #end if + #if $tool.inputs + <div id="gd_inputs"> + Inputs + <ul> + #for input in $tool.inputs + <li>${input.display()}</li> + #end for + </ul> + </div> + #end if + #if $tool.misc + <div id="gd_misc"> + $tool.misc.display() + </div> + #end if + </div> + </body> +</html>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genome_diversity.py Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,266 @@ +#!/usr/bin/env python2.5 + +import sys +import cdblib + +def _openfile( filename=None, mode='r' ): + try: + fh = open( filename, mode ) + except IOError, err: + raise RuntimeError( "can't open file: %s\n" % str( err ) ) + return fh + +def get_filename_from_loc( species=None, filename=None ): + fh = _openfile( filename ) + for line in fh: + if line and not line.startswith( '#' ): + line = line.rstrip( '\r\n' ) + if line: + elems = line.split( '\t' ) + if len( elems ) >= 2 and elems[0] == species: + return elems[1] + + raise RuntimeError( "can't find '%s' in location file: %s\n" % ( species, filename ) ) + + +class SnpFile( object ): + def __init__( self, filename=None, seq_col=1, pos_col=2, ref_seq_col=7, ref_pos_col=8 ): + self.filename = filename + self.fh = _openfile( filename ) + self.seq_col = seq_col + self.pos_col = pos_col + self.ref_seq_col = ref_seq_col + self.ref_pos_col = ref_pos_col + self.elems = None + self.line = None + self.comments = [] + + def next( self ): + while self.fh: + try: + self.line = self.fh.next() + except StopIteration: + self.line = None + self.elems = None + return None + if self.line: + self.line = self.line.rstrip( '\r\n' ) + if self.line: + if self.line.startswith( '#' ): + self.comments.append( self.line ) + else: + self.elems = self.line.split( '\t' ) + return 1 + + def get_seq_pos( self ): + if self.elems: + return self.elems[ self.seq_col - 1 ], self.elems[ self.pos_col - 1 ] + else: + return None, None + + def get_ref_seq_pos( self ): + if self.elems: + return self.elems[ self.ref_seq_seq - 1 ], self.elems[ self.ref_pos_col - 1 ] + else: + return None, None + + +class IndexedFile( object ): + + def __init__( self, data_file=None, index_file=None ): + self.data_file = data_file + self.index_file = index_file + self.data_fh = _openfile( data_file ) + self.index_fh = _openfile( index_file ) + self._reader = cdblib.Reader( self.index_fh.read(), hash ) + + def get_indexed_line( self, key=None ): + line = None + if key in self._reader: + offset = self._reader.getint( key ) + self.data_fh.seek( offset ) + try: + line = self.data_fh.next() + except StopIteration: + raise RuntimeError( 'index file out of sync for %s' % key ) + return line + +class PrimersFile( IndexedFile ): + def get_primer_header( self, sequence=None, position=None ): + key = "%s %s" % ( str( sequence ), str( position ) ) + header = self.get_indexed_line( key ) + if header: + if header.startswith( '>' ): + elems = header.split() + if len( elems ) < 3: + raise RuntimeError( 'short primers header for %s' % key ) + if sequence != elems[1] or str( position ) != elems[2]: + raise RuntimeError( 'primers index for %s finds %s %s' % ( key, elems[1], elems[2] ) ) + else: + raise RuntimeError( 'primers index out of sync for %s' % key ) + return header + + def get_entry( self, sequence=None, position=None ): + entry = self.get_primer_header( sequence, position ) + if entry: + while self.data_fh: + try: + line = self.data_fh.next() + except StopIteration: + break + if line.startswith( '>' ): + break + entry += line + return entry + + def get_enzymes( self, sequence=None, position=None ): + entry = self.get_primer_header( sequence, position ) + enzyme_list = [] + if entry: + try: + line = self.data_fh.next() + except StopIteration: + raise RuntimeError( 'primers entry for %s %s is truncated' % ( str( sequence ), str( position ) ) ) + if line.startswith( '>' ): + raise RuntimeError( 'primers entry for %s %s is truncated' % ( str( sequence ), str( position ) ) ) + line.rstrip( '\r\n' ) + if line: + enzymes = line.split( ',' ) + for enzyme in enzymes: + enzyme = enzyme.strip() + if enzyme: + enzyme_list.append( enzyme ) + return enzyme_list + +class SnpcallsFile( IndexedFile ): + def get_snp_seq( self, sequence=None, position=None ): + key = "%s %s" % ( str( sequence ), str( position ) ) + line = self.get_indexed_line( key ) + if line: + elems = line.split( '\t' ) + if len (elems) < 3: + raise RuntimeError( 'short snpcalls line for %s' % key ) + if sequence != elems[0] or str( position ) != elems[1]: + raise RuntimeError( 'snpcalls index for %s finds %s %s' % ( key, elems[0], elems[1] ) ) + return elems[2] + else: + return None + + def get_flanking_dna( self, sequence=None, position=None, format='fasta' ): + if format != 'fasta' and format != 'primer3': + raise RuntimeError( 'invalid format for flanking dna: %s' % str( format ) ) + seq = self.get_snp_seq( sequence, position ) + if seq: + p = seq.find('[') + if p == -1: + raise RuntimeError( 'snpcalls entry for %s %s missing left bracket: %s' % ( str( sequence ), str( position ), seq ) ) + q = seq.find(']', p + 1) + if q == -1: + raise RuntimeError( 'snpcalls entry for %s %s missing right bracket: %s' % ( str( sequence ), str( position ), seq ) ) + q += 1 + + if format == 'fasta': + flanking_seq = '> ' + else: + flanking_seq = 'SEQUENCE_ID=' + + flanking_seq += "%s %s %s %s\n" % ( str( sequence ), str( position ), seq[p+1], seq[p+3] ) + + if format == 'primer3': + flanking_seq += 'SEQUENCE_TEMPLATE=' + + flanking_seq += "%sn%s\n" % ( seq[0:p], seq[q:] ) + + if format == 'primer3': + flanking_seq += "SEQUENCE_TARGET=%d,11\n=\n" % ( p - 5 ) + + return flanking_seq + else: + return None + + + +class LocationFile( object ): + def __init__(self, filename): + self.build_map(filename) + + def build_map(self, filename): + self.map = {} + self.open_file(filename) + for line in self.read_lines(): + elems = line.split('\t', 1) + if len(elems) == 2: + self.map[ elems[0].strip() ] = elems[1].strip() + self.close_file() + + def read_lines(self): + for line in self.fh: + if not line.startswith('#'): + line = line.rstrip('\r\n') + yield line + + def open_file(self, filename): + self.filename = filename + try: + self.fh = open(filename, 'r') + except IOError, err: + print >> sys.stderr, "Error opening location file '%s': %s" % (filename, str(err)) + sys.exit(1) + + def close_file(self): + self.fh.close() + + def loc_file( self, key ): + if key in self.map: + return self.map[key] + else: + print >> sys.stderr, "'%s' does not appear in location file '%s'" % (key, self.filename) + sys.exit(1) + +class ChrLens( object ): + def __init__( self, chrlen_filename ): + self.chrlen_filename = chrlen_filename + self.build_map() + + def build_map(self): + self.map = {} + self.open_file(self.chrlen_filename) + for line in self.read_lines(): + elems = line.split('\t', 1) + if len(elems) == 2: + chrom = elems[0].strip() + chrom_len_text = elems[1].strip() + try: + chrom_len = int( chrom_len_text ) + except ValueError: + print >> sys.stderr, "Bad length '%s' for chromosome '%s' in '%s'" % (chrom_len_text, chrom, self.chrlen_filename) + self.map[ chrom ] = chrom_len + self.close_file() + + def read_lines(self): + for line in self.fh: + if not line.startswith('#'): + line = line.rstrip('\r\n') + yield line + + def open_file(self, filename): + self.filename = filename + try: + self.fh = open(filename, 'r') + except IOError, err: + print >> sys.stderr, "Error opening chromosome length file '%s': %s" % (filename, str(err)) + sys.exit(1) + + def close_file(self): + self.fh.close() + + def length( self, key ): + if key in self.map: + return self.map[key] + else: + return None + + def __iter__( self ): + for chrom in self.map: + yield chrom +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/galaxy/datatypes/wsf.py Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,191 @@ +""" +SnpFile datatype +""" + +import data +import tempfile +import os +import simplejson +from galaxy import util +from galaxy.datatypes.sniff import * +from galaxy.datatypes.tabular import Tabular +from galaxy.datatypes.images import Html +from galaxy.datatypes.binary import BigWig +from galaxy.datatypes import metadata +from galaxy.datatypes.metadata import MetadataElement + +class BigWigPos(BigWig): + def __init__( self, **kwd ): + BigWig.__init__( self, **kwd ) + self._name = "BigWigPos" + +class Wped( Html ): + allow_datatype_change = False + composite_type = 'basic' + file_ext = 'wped' + + MetadataElement( name="base_name", desc="base name for all transformed versions of this genetic dataset", default='WpedData', readonly=True, set_in_upload=True ) + + def __init__( self, **kwd ): + Html.__init__( self, **kwd ) + self.add_composite_file( '%s.ped', description = 'Pedigree File', substitute_name_with_metadata = 'base_name', is_binary = False ) + self.add_composite_file( '%s.map', description = 'Map File', substitute_name_with_metadata = 'base_name', is_binary = False ) + +class Individuals( Tabular ): + file_ext = 'ind' + def __init__(self, **kwd): + Tabular.__init__( self, **kwd ) + self.column_names = [ 'Column', 'Name', 'Alias' ] + + def display_peek( self, dataset ): + return Tabular.make_html_table( self, dataset, column_names=self.column_names ) + +class DatasetComments( object ): + def __init__( self, dataset, comment_string='#' ): + self.dataset = dataset + self.comment_string = comment_string + self.comment_string_len = len(comment_string) + self._comments = [] + self._read_comments() + + def _read_comments( self ): + if self.dataset.has_data(): + try: + for line in open(self.dataset.file_name, 'rU'): + if line.startswith(self.comment_string): + comment = line[self.comment_string_len:] + self._comments.append(comment) + else: + break + except: + pass + + def __str__( self ): + return "".join(self._comments) + + @property + def comments( self ): + return self._comments + +class DatasetCommentMetadata( object ): + def __init__( self, dataset, comment_string='#' ): + self.dataset_comments = DatasetComments( dataset, comment_string ) + self._comment_metadata = {} + self._decode_dataset_comments() + + def _decode_dataset_comments( self ): + dataset_comment_string = str( self.dataset_comments ) + try: + self._comment_metadata = simplejson.loads( dataset_comment_string ) + except simplejson.JSONDecodeError as e: + pass + + @property + def comment_metadata( self ): + return self._comment_metadata + +class AnnotatedTabular( Tabular ): + """ Tabular file with optional comment block containing JSON to be imported into metadata """ + MetadataElement( name="comment_metadata", desc="comment metadata", param=metadata.DictParameter, visible=False, readonly=True ) + + def set_meta( self, dataset, overwrite = True, **kwd ): + Tabular.set_meta( self, dataset, overwrite=overwrite, max_data_lines=None, max_guess_type_data_lines=1000, **kwd ) + if dataset.metadata.comment_metadata is None: + dataset_comment_metadata = DatasetCommentMetadata( dataset ) + dataset.metadata.comment_metadata = dataset_comment_metadata.comment_metadata.copy() + self.set_dataset_metadata_from_comments( dataset ) + + def set_dataset_metadata_from_comments( self, dataset ): + pass + + def set_peek( self, dataset, line_count=None, is_multi_byte=False ): + super(Tabular, self).set_peek( dataset, line_count=line_count, is_multi_byte=is_multi_byte, WIDTH='unlimited', skipchars=['#'] ) + + def display_peek( self, dataset ): + """Returns formated html of peek""" + return Tabular.make_html_table( self, dataset, skipchars=['#'] ) + +class Fake( AnnotatedTabular ): + MetadataElement( name="scaffold", desc="scaffold column", param=metadata.ColumnParameter, default=0 ) + MetadataElement( name="pos", desc="pos column", param=metadata.ColumnParameter, default=0 ) + MetadataElement( name="ref", desc="ref column", param=metadata.ColumnParameter, default=0 ) + MetadataElement( name="rPos", desc="rPos column", param=metadata.ColumnParameter, default=0 ) + MetadataElement( name="species", desc="species", default='', no_value='', visible=False, readonly=True ) + + def set_dataset_metadata_from_comments( self, dataset ): + self.set_dataset_column_names_metadata( dataset ) + self.set_dataset_columnParameter_metadata( dataset ) + self.set_dataset_species_metadata( dataset ) + self.set_dataset_dbkey_metadata( dataset ) + + def set_dataset_column_names_metadata( self, dataset ): + value_from_comment_metadata = dataset.metadata.comment_metadata.get( 'column_names', None ) + if isinstance( value_from_comment_metadata, list ): + dataset.metadata.column_names = value_from_comment_metadata[:] + + def set_dataset_columnParameter_metadata( self, dataset ): + for name, spec in dataset.metadata.spec.items(): + if isinstance( spec.param, metadata.ColumnParameter ): + value_from_comment_metadata = dataset.metadata.comment_metadata.get( name, None ) + if value_from_comment_metadata is not None: + try: + i = int( value_from_comment_metadata ) + except: + i = 0 + if 0 <= i <= dataset.metadata.columns: + setattr( dataset.metadata, name, i ) + + def set_dataset_species_metadata( self, dataset ): + value_from_comment_metadata = dataset.metadata.comment_metadata.get( 'species', None ) + if isinstance( value_from_comment_metadata, basestring ): + dataset.metadata.species = value_from_comment_metadata + + def set_dataset_dbkey_metadata( self, dataset ): + value_from_comment_metadata = dataset.metadata.comment_metadata.get( 'dbkey', '?' ) + if isinstance( value_from_comment_metadata, basestring ): + dataset.metadata.dbkey = value_from_comment_metadata + +class SnpFile( Fake ): + """ Webb's SNP file format """ + file_ext = 'wsf' + + MetadataElement( name="individual_names", desc="individual names", visible=False, readonly=True ) + MetadataElement( name="individual_columns", desc="individual columns", visible=False, readonly=True ) + + def set_dataset_metadata_from_comments( self, dataset ): + Fake.set_dataset_metadata_from_comments( self, dataset ) + self.set_dataset_individual_metadata( dataset ) + + def set_dataset_individual_metadata( self, dataset ): + individual_list = dataset.metadata.comment_metadata.get( 'individuals', None ) + if not isinstance( individual_list, list ): + individual_list = [] + + individual_names = [] + individual_columns = [] + + for individual in individual_list: + if not isinstance( individual, list ) or len( individual ) != 2: + continue + name, col = individual + if not isinstance( name, basestring ): + name = '' + try: + c = int( col ) + except: + c = 0 + if 0 < c <= dataset.metadata.columns: + individual_names.append( name ) + individual_columns.append( c ) + + if individual_names: + dataset.metadata.individual_names = individual_names[:] + dataset.metadata.individual_columns = individual_columns[:] + +class SapFile( Fake ): + """ Webb's SAP file format """ + file_ext = 'wpf' + + MetadataElement( name="kegg_gene", desc="KEGG gene code column", param=metadata.ColumnParameter, default=0 ) + MetadataElement( name="kegg_path", desc="KEGG pathway code/name column", param=metadata.ColumnParameter, default=0 ) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mkpthwpng.py Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,92 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# mkpthwpng.py +# +# Copyright 2011 Oscar Bedoya-Reina <oscar@niska.bx.psu.edu> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301, USA. + +import argparse,mechanize,os,sys + +#this return an image made up from a list of genes and pathway code +def rtnHTMLformat(tmpddGenrcgenPresent,sppPrefx,pthwcod,ouPthwpng): + inpx='\n'.join(tmpddGenrcgenPresent)#inpx="ALDH2 color \nALDH3A1 color" + request=mechanize.Request("http://www.genome.jp/kegg/tool/map_pathway2.html") + response = mechanize.urlopen(request) + forms = mechanize.ParseResponse(response, backwards_compat=False) + form=forms[0] + form["unclassified"]=inpx + form["org_name"]=[sppPrefx] + request2 = form.click() + response2 = mechanize.urlopen(request2) + a=str(response2.read()).split('href="/kegg-bin/show_pathway?')[1] + code=a.split('/')[0]#response2.read() + request=mechanize.Request("http://www.genome.jp/kegg-bin/show_pathway?%s/%s.args"%(code,pthwcod))#request=mechanize.Request("http://www.genome.jp/kegg-bin/show_pathway?%s/%s.args"%('13171478854246','hsa00410')) + response = mechanize.urlopen(request) + forms = mechanize.ParseResponse(response, backwards_compat=False) + form=forms[1] + status=' NOT ' + try: + imgf=str(forms[1]).split('/mark_pathway')[1].split('/')[0] + os.system("wget --quiet http://www.genome.jp/tmp/mark_pathway%s/%s.png -O %s"%(imgf,pthwcod,ouPthwpng)) + status=' ' + except: + pass + return 'A pathway image was%ssuccefully produced...'%status + + +def main(): + parser = argparse.ArgumentParser(description='Obtain KEGG images from a list of genes.') + parser.add_argument('--input',metavar='input TXT file',type=str,help='the input file with the table in txt format') + parser.add_argument('--output',metavar='output PNG image',type=str,help='the output image file in png format') + parser.add_argument('--KEGGpath',metavar='KEGG pathway code (i.e. cfa00230)',type=str,help='the code of the pathway of interest') + parser.add_argument('--posKEGGclmn',metavar='column number',type=int,help='the column with the KEGG pathway code/name') + parser.add_argument('--KEGGgeneposcolmn',metavar='column number',type=int,help='column with the KEGG gene code') + #~Open arguments + class C(object): + pass + fulargs=C() + parser.parse_args(sys.argv[1:],namespace=fulargs) + #test input vars + inputf,outputf,KEGGpathw,posKEGGclmn,Kgeneposcolmn=fulargs.input,fulargs.output,fulargs.KEGGpath,fulargs.posKEGGclmn,fulargs.KEGGgeneposcolmn + # make posKEGGclmn, Kgeneposcolmn 0-based + sppPrefx= KEGGpathw[:3] + posKEGGclmn -= 1 + Kgeneposcolmn -= 1 + #make a dictionary of valid genes + dKEGGcPthws=dict([(x.split('\t')[Kgeneposcolmn],set([y.split('=')[0] for y in x.split('\t')[posKEGGclmn].split('.')])) for x in open(inputf).read().splitlines()[1:] if x.strip()]) + for mt1gene in [x for x in dKEGGcPthws.keys() if x.find('.')>-1]:#to crrect names with more than one gene + pthwsAssotd=dKEGGcPthws.pop(mt1gene) + for eachg in mt1gene.split('.'): + dKEGGcPthws[eachg]=pthwsAssotd + tmpddGenrcgenPresent=set() + sKEGGc=dKEGGcPthws.keys() + lsKEGGc=len(sKEGGc) + ctPthw=0 + while ctPthw < lsKEGGc:#to save memory + eachK=sKEGGc.pop() + alPthws=dKEGGcPthws[eachK] + if KEGGpathw in alPthws: + tmpddGenrcgenPresent.add('\t'.join([eachK,'red'])) + ctPthw+=1 + #run the program + rtnHTMLformat(tmpddGenrcgenPresent,sppPrefx,KEGGpathw,outputf) + return 0 + + +if __name__ == '__main__': + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/modify_snp_table.py Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,51 @@ +#!/usr/bin/env python + +import sys +import subprocess +from Population import Population + +################################################################################ + +if len(sys.argv) < 9: + print >> sys.stderr, "Usage" + sys.exit(1) + +input, p1_input, output, lo, hi, lo_ind, lo_ind_qual = sys.argv[1:8] +individual_metadata = sys.argv[8:] + +p_total = Population() +p_total.from_tag_list(individual_metadata) + +p1 = Population() +p1.from_population_file(p1_input) + +if not p_total.is_superset(p1): + print >> sys.stderr, 'There is an individual in the population that is not in the SNP table' + sys.exit(1) + +################################################################################ + +prog = 'pop' + +args = [] +args.append(prog) +args.append(input) +args.append(lo) +args.append(hi) +args.append(lo_ind) +args.append(lo_ind_qual) + +columns = p1.column_list() + +for column in sorted(columns): + args.append(column) + +fh = open(output, 'w') + +#print "args:", ' '.join(args) +p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=fh, stderr=sys.stderr) +rc = p.wait() +fh.close() + +sys.exit(0) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/modify_snp_table.xml Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,67 @@ +<tool id="gd_modify_snp_table" name="Modify" version="1.0.0"> + <description>a SNP table</description> + + <command interpreter="python"> + modify_snp_table.py "$input" "$p1_input" "$output" + #if $limit_coverage.choice == "0" + "-1" "-1" "-1" "-1" + #else + "${limit_coverage.lo_coverage}" "${limit_coverage.hi_coverage}" "${limit_coverage.low_ind_cov}" "${limit_coverage.lo_quality}" + #end if + #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns) + #set $arg = '%s:%s' % ($individual_col, $individual) + "$arg" + #end for + </command> + + <inputs> + <param name="input" type="data" format="wsf" label="SNP table" /> + <param name="p1_input" type="data" format="ind" label="Population individuals" /> + <conditional name="limit_coverage"> + <param name="choice" type="select" format="integer" label="Option"> + <option value="0" selected="true">add columns to the SNP table</option> + <option value="1">discard some SNPs</option> + </param> + <when value="0" /> + <when value="1"> + <param name="lo_coverage" type="integer" min="0" value="0" label="Lower bound on total coverage" /> + <param name="hi_coverage" type="integer" min="0" value="1000" label="Upper bound on total coverage" /> + <param name="low_ind_cov" type="integer" min="0" value="0" label="Lower bound on individual coverage" /> + <param name="lo_quality" type="integer" min="0" value="0" label="Lower bound on individual quality values" /> + </when> + </conditional> + </inputs> + + <outputs> + <data name="output" format="wsf" metadata_source="input" /> + </outputs> + + <tests> + <test> + <param name="input" value="genome_diversity/test_in/sample.wsf" ftype="wsf" /> + <param name="p1_input" value="genome_diversity/test_in/a.ind" ftype="ind" /> + <param name="choice" value="1" /> + <param name="lo_coverage" value="0" /> + <param name="hi_coverage" value="1000" /> + <param name="low_ind_cov" value="3" /> + <param name="lo_quality" value="30" /> + <output name="output" file="genome_diversity/test_out/modify_snp_table/modify.wsf" /> + </test> + </tests> + + <help> +**What it does** + +The user specifies that some of the individuals in the selected SNP table are +form a "population" that has been previously defined using the Galaxy tool to +select individuals from a SNP table. One option is for the program to append +four columns to the table, giving the total counts for the two alleles, the +"genotype" for the population and the maximum quality value, taken over all +individuals in the population. If all defined genotypes in the population +are 2 (agree with the reference), the population's genotype is 2; similarly +for 0; otherwise the genotype is 1 (unless all individuals have undefined +genotype, in which case it is -1. The other option is to remove rows from +the table for which the total coverage for the population is either too low +or too high, and/or if the individual coverage or quality value is too low. + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pathway_image.xml Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,46 @@ +<tool id="gd_pathway_image" name="Generate" version="1.0.0"> + <description>KEGG pathway images</description> + + <command interpreter="python"> + mkpthwpng.py + "--input=${input}" + "--output=${output}" + "--KEGGpath=${pathway}" + "--posKEGGclmn=${input.metadata.kegg_path}" + "--KEGGgeneposcolmn=${input.metadata.kegg_gene}" + </command> + + <inputs> + <param name="input" type="data" format="wpf" label="Table"> + <validator type="metadata" check="kegg_gene,kegg_path" message="Missing KEGG gene code column and/or KEGG pathway code/name column metadata. Click the pencil icon in the history item to edit/save the metadata attributes" /> + </param> + <param name="pathway" type="select"> + <options from_file="gd.pathways.txt"> + <column name="value" index="1"/> + <column name="name" index="2"/> + <filter type="data_meta" ref="input" key="dbkey" column="0" separator="\t" /> + </options> + </param> + </inputs> + + <outputs> + <data name="output" format="png" /> + </outputs> + + <tests> + <test> + <param name="input" value="genome_diversity/test_in/sample.wpf" ftype="wpf" /> + <param name="pathway" value="cfa05214" /> + <output name="output" file="genome_diversity/test_out/pathway_image/pathway_image.png" compare="sim_size" delta = "10000" /> + </test> + </tests> + + <help> +**What it does** + +This tool produces an image of an input KEGG pathway, highlighting the +modules representing genes in an input list. NOTE: a given gene can +be assigned to multiple modules, and different genes can be assigned to +the same module. + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pca.py Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,258 @@ +#!/usr/bin/env python + +import errno +import os +import shutil +import subprocess +import sys +from BeautifulSoup import BeautifulSoup +import gd_composite + +################################################################################ + +def mkdir_p(path): + try: + os.makedirs(path) + except OSError, e: + if e.errno <> errno.EEXIST: + raise + +################################################################################ + +def run_program(prog, args, stdout_file=None): + #print "args: ", ' '.join(args) + p = subprocess.Popen(args, bufsize=-1, executable=prog, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + (stdoutdata, stderrdata) = p.communicate() + rc = p.returncode + + if stdout_file is not None: + with open(stdout_file, 'w') as ofh: + print >> ofh, stdoutdata + + if rc != 0: + print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args)) + print >> sys.stderr, stderrdata + sys.exit(1) + +################################################################################ + +def do_ped2geno(input, output): + lines = [] + with open(input) as fh: + for line in fh: + line = line.rstrip('\r\n') + lines.append(line.split()) + + pair_map = { + '0':{ '0':'9', '1':'9', '2':'9' }, + '1':{ '0':'1', '1':'2', '2':'1' }, + '2':{ '0':'1', '1':'1', '2':'0' } + } + with open(output, 'w') as ofh: + for a_idx in xrange(6, len(lines[0]), 2): + b_idx = a_idx + 1 + print >> ofh, ''.join(map(lambda line: pair_map[line[a_idx]][line[b_idx]], lines)) + +def do_map2snp(input, output): + with open(output, 'w') as ofh: + with open(input) as fh: + for line in fh: + elems = line.split() + print >> ofh, ' {0} 11 0.002 2000 A T'.format(elems[1]) + +def make_ind_file(ind_file, input): + pops = [] + + ofh = open(ind_file, 'w') + + with open(input) as fh: + soup = BeautifulSoup(fh) + misc = soup.find('div', {'id': 'gd_misc'}) + populations = misc('ul')[0] + + i = 0 + for entry in populations: + if i % 2 == 1: + population_name = entry.contents[0].encode('utf8').strip().replace(' ', '_') + pops.append(population_name) + individuals = entry.ol('li') + for individual in individuals: + individual_name = individual.string.encode('utf8').strip() + print >> ofh, individual_name, 'M', population_name + i += 1 + + ofh.close() + return pops + +def make_par_file(par_file, geno_file, snp_file, ind_file, evec_file, eval_file): + with open(par_file, 'w') as fh: + print >> fh, 'genotypename: {0}'.format(geno_file) + print >> fh, 'snpname: {0}'.format(snp_file) + print >> fh, 'indivname: {0}'.format(ind_file) + print >> fh, 'evecoutname: {0}'.format(evec_file) + print >> fh, 'evaloutname: {0}'.format(eval_file) + print >> fh, 'altnormstyle: NO' + print >> fh, 'numoutevec: 2' + +def do_smartpca(par_file): + prog = 'smartpca' + + args = [ prog ] + args.append('-p') + args.append(par_file) + + #print "args: ", ' '.join(args) + p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=subprocess.PIPE, stderr=sys.stderr) + (stdoutdata, stderrdata) = p.communicate() + rc = p.returncode + + if rc != 0: + print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args)) + print >> sys.stderr, stderrdata + sys.exit(1) + + stats = [] + + save_line = False + for line in stdoutdata.split('\n'): + if line.startswith(('## Average divergence', '## Anova statistics', '## Statistical significance')): + stats.append('') + save_line = True + if line.strip() == '': + save_line = False + if save_line: + stats.append(line) + + return '\n'.join(stats[1:]) + +def do_ploteig(evec_file, population_names): + prog = 'gd_ploteig' + + args = [ prog ] + args.append('-i') + args.append(evec_file) + args.append('-c') + args.append('1:2') + args.append('-p') + args.append(':'.join(population_names)) + args.append('-x') + + run_program(None, args) + +def do_eval2pct(eval_file, explained_file): + prog = 'eval2pct' + + args = [ prog ] + args.append(eval_file) + + with open(explained_file, 'w') as ofh: + #print "args:", ' '.join(args) + p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=ofh, stderr=subprocess.PIPE) + (stdoutdata, stderrdata) = p.communicate() + rc = p.returncode + + if rc != 0: + print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args)) + print >> sys.stderr, stderrdata + sys.exit(1) + +def do_coords2admix(coords_file): + prog = 'coords2admix' + + args = [ prog ] + args.append(coords_file) + + with open('fake', 'w') as ofh: + #print "args:", ' '.join(args) + p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=ofh, stderr=subprocess.PIPE) + (stdoutdata, stderrdata) = p.communicate() + rc = p.returncode + + if rc != 0: + print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args)) + print >> sys.stderr, stderrdata + sys.exit(1) + + shutil.copy2('fake', coords_file) + +################################################################################ + +if len(sys.argv) != 5: + print "usage" + sys.exit(1) + +input, input_files_path, output, output_files_path = sys.argv[1:5] + +mkdir_p(output_files_path) + +ped_file = os.path.join(input_files_path, 'admix.ped') +geno_file = os.path.join(output_files_path, 'admix.geno') +do_ped2geno(ped_file, geno_file) + +map_file = os.path.join(input_files_path, 'admix.map') +snp_file = os.path.join(output_files_path, 'admix.snp') +do_map2snp(map_file, snp_file) + +ind_file = os.path.join(output_files_path, 'admix.ind') +population_names = make_ind_file(ind_file, input) + +par_file = os.path.join(output_files_path, 'par.admix') +evec_file = os.path.join(output_files_path, 'coordinates.txt') +eval_file = os.path.join(output_files_path, 'admix.eval') +make_par_file(par_file, geno_file, snp_file, ind_file, evec_file, eval_file) + +smartpca_stats = do_smartpca(par_file) + +do_ploteig(evec_file, population_names) +plot_file = 'coordinates.txt.1:2.{0}.pdf'.format(':'.join(population_names)) +output_plot_file = os.path.join(output_files_path, 'PCA.pdf') +shutil.copy2(plot_file, output_plot_file) +os.unlink(plot_file) + +do_eval2pct(eval_file, os.path.join(output_files_path, 'explained.txt')) +os.unlink(eval_file) + +do_coords2admix(evec_file) + +################################################################################ + +info_page = gd_composite.InfoPage() +info_page.set_title('PCA Galaxy Composite Dataset') + +display_file = gd_composite.DisplayFile() +display_value = gd_composite.DisplayValue() + +out_pdf = gd_composite.Parameter(name='PCA.pdf', value='PCA.pdf', display_type=display_file) +out_evec = gd_composite.Parameter(name='coordinates.txt', value='coordinates.txt', display_type=display_file) +out_explained = gd_composite.Parameter(name='explained.txt', value='explained.txt', display_type=display_file) + +evec_prefix = 'coordinates.txt.1:2.{0}'.format(':'.join(population_names)) +ps_file = '{0}.ps'.format(evec_prefix) +xtxt_file = '{0}.xtxt'.format(evec_prefix) + +os.unlink(os.path.join(output_files_path, ps_file)) +os.unlink(os.path.join(output_files_path, xtxt_file)) + +info_page.add_output_parameter(out_pdf) +info_page.add_output_parameter(out_evec) +info_page.add_output_parameter(out_explained) + +in_admix = gd_composite.Parameter(name='par.admix', value='par.admix', display_type=display_file) +in_geno = gd_composite.Parameter(name='admix.geno', value='admix.geno', display_type=display_file) +in_snp = gd_composite.Parameter(name='admix.snp', value='admix.snp', display_type=display_file) +in_ind = gd_composite.Parameter(name='admix.ind', value='admix.ind', display_type=display_file) + +info_page.add_input_parameter(in_admix) +info_page.add_input_parameter(in_geno) +info_page.add_input_parameter(in_snp) +info_page.add_input_parameter(in_ind) + +misc_stats = gd_composite.Parameter(description='Stats<p/><pre>\n{0}\n</pre>'.format(smartpca_stats), display_type=display_value) + +info_page.add_misc(misc_stats) + +with open (output, 'w') as ofh: + print >> ofh, info_page.render() + +sys.exit(0) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/phylogenetic_tree.py Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,219 @@ +#!/usr/bin/env python + +import os +import errno +import sys +import subprocess +import shutil +from Population import Population +import gd_composite + +################################################################################ + +def mkdir_p(path): + try: + os.makedirs(path) + except OSError, e: + if e.errno <> errno.EEXIST: + raise + +################################################################################ + +if len(sys.argv) < 11: + print >> sys.stderr, "Usage" + sys.exit(1) + +input, p1_input, output, extra_files_path, minimum_coverage, minimum_quality, dbkey, data_source, draw_tree_options = sys.argv[1:10] + +individual_metadata = sys.argv[10:] + +# note: TEST THIS +if dbkey in ['', '?', 'None']: + dbkey = 'none' + +p_total = Population() +p_total.from_tag_list(individual_metadata) + + +################################################################################ + +mkdir_p(extra_files_path) + +################################################################################ + +def run_program(prog, args, ofh): + #print "args: ", ' '.join(args) + p = subprocess.Popen(args, bufsize=-1, executable=prog, stdin=None, stdout=ofh, stderr=subprocess.PIPE) + (stdoutdata, stderrdata) = p.communicate() + rc = p.returncode + ofh.close() + + if rc != 0: + #print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args)) + print >> sys.stderr, stderrdata + sys.exit(1) + +################################################################################ + +phylip_outfile = os.path.join(extra_files_path, 'distance_matrix.phylip') +newick_outfile = os.path.join(extra_files_path, 'phylogenetic_tree.newick') +ps_outfile = 'tree.ps' +pdf_outfile = os.path.join(extra_files_path, 'tree.pdf') + +################################################################################ + +informative_snp_file = os.path.join(extra_files_path, 'informative_snps.txt') +mega_distance_matrix_file = os.path.join(extra_files_path, 'mega_distance_matrix.txt') + +prog = 'dist_mat' + +args = [] +args.append(prog) +args.append(input) +args.append(minimum_coverage) +args.append(minimum_quality) +args.append(dbkey) +args.append(data_source) +args.append(informative_snp_file) +args.append(mega_distance_matrix_file) + +if p1_input == "all_individuals": + tags = p_total.tag_list() +else: + p1 = Population() + p1.from_population_file(p1_input) + if not p_total.is_superset(p1): + print >> sys.stderr, 'There is an individual in the population that is not in the SNP table' + sys.exit(1) + tags = p1.tag_list() + +for tag in tags: + args.append(tag) + +fh = open(phylip_outfile, 'w') +run_program(None, args, fh) + +################################################################################ + +prog = 'quicktree' + +args = [] +args.append(prog) +args.append('-in') +args.append('m') +args.append('-out') +args.append('t') +args.append(phylip_outfile) + +fh = open(newick_outfile, 'w') +run_program(None, args, fh) + +################################################################################ + +prog = 'draw_tree' + +args = [] +args.append(prog) +if draw_tree_options: + args.append(draw_tree_options) +args.append(newick_outfile) + +fh = open(ps_outfile, 'w') +run_program(None, args, fh) + +################################################################################ + +prog = 'ps2pdf' + +args = [] +args.append(prog) +args.append('-dPDFSETTINGS=/prepress') +args.append(ps_outfile) +args.append('-') + +fh = open(pdf_outfile, 'w') +run_program(None, args, fh) + +shutil.copyfile(pdf_outfile, output) + +################################################################################ + +info_page = gd_composite.InfoPage() +info_page.set_title('Phylogenetic tree Galaxy Composite Dataset') + +display_file = gd_composite.DisplayFile() +display_value = gd_composite.DisplayValue() + +out_pdf = gd_composite.Parameter(name='tree.pdf', value='tree.pdf', display_type=display_file) +out_newick = gd_composite.Parameter(value='phylogenetic_tree.newick', name='phylogenetic tree (newick)', display_type=display_file) +out_phylip = gd_composite.Parameter(value='distance_matrix.phylip', name='Phylip distance matrix', display_type=display_file) +out_mega = gd_composite.Parameter(value='mega_distance_matrix.txt', name='Mega distance matrix', display_type=display_file) +out_snps = gd_composite.Parameter(value='informative_snps.txt', name='informative SNPs', display_type=display_file) + +info_page.add_output_parameter(out_pdf) +info_page.add_output_parameter(out_newick) +info_page.add_output_parameter(out_phylip) +info_page.add_output_parameter(out_mega) +info_page.add_output_parameter(out_snps) + +in_min_cov = gd_composite.Parameter(description='Minimum coverage', value=minimum_coverage, display_type=display_value) +in_min_qual = gd_composite.Parameter(description='Minimum quality', value=minimum_quality, display_type=display_value) + +include_ref_value = 'no' +if dbkey != 'none': + include_ref_value = 'yes' + +in_include_ref = gd_composite.Parameter(description='Include reference sequence', value=include_ref_value, display_type=display_value) + +if data_source == '0': + data_source_value = 'sequence coverage' +elif data_source == '1': + data_source_value = 'estimated genotype' + +in_data_source = gd_composite.Parameter(description='Data source', value=data_source_value, display_type=display_value) + +branch_type_value = 'square' +if 'd' in draw_tree_options: + branch_type_value = 'diagonal' + +in_branch_type = gd_composite.Parameter(description='Branch type', value=branch_type_value, display_type=display_value) + +branch_scale_value = 'yes' +if 's' in draw_tree_options: + branch_scale_value = 'no' + +in_branch_scale = gd_composite.Parameter(description='Draw branches to scale', value=branch_scale_value, display_type=display_value) + +branch_length_value = 'yes' +if 'b' in draw_tree_options: + branch_length_value = 'no' + +in_branch_length = gd_composite.Parameter(description='Show branch lengths', value=branch_length_value, display_type=display_value) + +tree_layout_value = 'horizontal' +if 'v' in draw_tree_options: + tree_layout_value = 'vertical' + +in_tree_layout = gd_composite.Parameter(description='Tree layout', value=tree_layout_value, display_type=display_value) + +info_page.add_input_parameter(in_min_cov) +info_page.add_input_parameter(in_min_qual) +info_page.add_input_parameter(in_include_ref) +info_page.add_input_parameter(in_data_source) +info_page.add_input_parameter(in_branch_type) +info_page.add_input_parameter(in_branch_scale) +info_page.add_input_parameter(in_branch_length) +info_page.add_input_parameter(in_tree_layout) + +misc_individuals = gd_composite.Parameter(name='Individuals', value=tags, display_type=gd_composite.DisplayTagList()) + +info_page.add_misc(misc_individuals) + + +with open(output, 'w') as ofh: + print >> ofh, info_page.render() + +################################################################################ + +sys.exit(0) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/phylogenetic_tree.xml Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,168 @@ +<tool id="gd_phylogenetic_tree" name="Phylogenetic" version="1.0.0"> + <description>tree</description> + + <command interpreter="python"> + phylogenetic_tree.py "$input" + #if $individuals.choice == '0' + "all_individuals" + #else if $individuals.choice == '1' + "$p1_input" + #end if + "$output" "$output.extra_files_path" "$minimum_coverage" "$minimum_quality" + #if ((str($input.metadata.scaffold) == str($input.metadata.ref)) and (str($input.metadata.pos) == str($input.metadata.rPos))) or (str($include_reference) == '0') + "none" + #else + "$input.metadata.dbkey" + #end if + "$data_source" + #set $draw_tree_options = ''.join(str(x) for x in [$branch_style, $scale_style, $length_style, $layout_style]) + #if $draw_tree_options == '' + "" + #else + "-$draw_tree_options" + #end if + #for $individual_name, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns) + #set $arg = '%s:%s' % ($individual_col, $individual_name) + "$arg" + #end for + </command> + + <inputs> + <param name="input" type="data" format="wsf" label="SNP table" /> + + <conditional name="individuals"> + <param name="choice" type="select" label="Individuals"> + <option value="0" selected="true">All</option> + <option value="1">Individuals in a population</option> + </param> + <when value="0" /> + <when value="1"> + <param name="p1_input" type="data" format="ind" label="Population individuals" /> + </when> + </conditional> + + <param name="minimum_coverage" type="integer" min="0" value="0" label="Minimum coverage" help="Note: Minimum coverage and Minimum quality cannot both be 0" /> + + <param name="minimum_quality" type="integer" min="0" value="0" label="Minimum quality" help="Note: Minimum coverage and Minimum quality cannot both be 0" /> + + <param name="include_reference" type="select" format="integer" label="Include reference sequence"> + <option value="1" selected="true">Yes</option> + <option value="0">No</option> + </param> + + <param name="data_source" type="select" format="integer" label="Data source"> + <option value="0" selected="true">sequence coverage</option> + <option value="1">estimated genotype</option> + </param> + + <param name="branch_style" type="select" display="radio"> + <label>Branch type</label> + <option value="" selected="true">square</option> + <option value="d">diagonal</option> + </param> + + <param name="scale_style" type="select" display="radio"> + <label>Draw branches to scale</label> + <option value="" selected="true">yes</option> + <option value="s">no</option> + </param> + + <param name="length_style" type="select" display="radio"> + <label>Show branch lengths</label> + <option value="" selected="true">yes</option> + <option value="b">no</option> + </param> + + <param name="layout_style" type="select" display="radio"> + <label>Tree layout</label> + <option value="" selected="true">horizontal</option> + <option value="v">vertical</option> + </param> + </inputs> + + <outputs> + <data name="output" format="html" /> + </outputs> + + <tests> + <test> + <param name="input" value="genome_diversity/test_in/sample.wsf" ftype="wsf" /> + <param name="choice" value="0" /> + <param name="minimum_coverage" value="3" /> + <param name="minimum_quality" value="30" /> + <param name="data_source" value="0" /> + <param name="branch_style" value="" /> + <param name="scale_style" value="" /> + <param name="length_style" value="" /> + <param name="layout_style" value="" /> + <output name="output" file="genome_diversity/test_out/phylogenetic_tree/phylogenetic_tree.html" ftype="html" compare="diff" lines_diff="2"> + <extra_files type="file" name='distance_matrix.phylip' value="genome_diversity/test_out/phylogenetic_tree/distance_matrix.phylip" /> + <extra_files type="file" name='informative_snps.txt' value="genome_diversity/test_out/phylogenetic_tree/informative_snps.txt" /> + <extra_files type="file" name='mega_distance_matrix.txt' value="genome_diversity/test_out/phylogenetic_tree/mega_distance_matrix.txt" /> + <extra_files type="file" name='phylogenetic_tree.newick' value="genome_diversity/test_out/phylogenetic_tree/phylogenetic_tree.newick" /> + <extra_files type="file" name='tree.pdf' value="genome_diversity/test_out/phylogenetic_tree/tree.pdf" compare="sim_size" delta = "1000"/> + </output> + </test> + </tests> + + <help> +**What it does** + +This tool uses a SNP table to determine a kind of "genetic distance" between +each pair of individuals. Optionally, that information can be used to +produce a tree-shaped figure that depicts how the individuals are related, +either as a text file in a common format, called NEWICK, or as a picture. +The user specifies the following inputs to the tool. + +SNP table + +Individuals + By default, all individuals are included in the analysis; an option + is to analyze only a subset of individuals that has been specified + using the tool to "Select individuals from a SNP table". + +Minimum coverage + For each pair of individuals, the tool looks for informative SNPs, i.e., + where the sequence data for both individuals is adequate according to + some criterion. Specifying, say, 7 for this option instructs the tool + to consider only SNPs with coverage at least 7 in both individuals + when estimating their "genetic distance". + +Minimum quality + Specifying, say, 37 for this option instructs the tool to consider + only SNPs with SAMtools quality value at least 37 in both individuals + when estimating their "genetic distance". + +Minimum number of informative SNPs + This option instructs the tool to terminate execution if at least one + pair of individuals does not have a required number of informative SNPs. + +Include reference sequence + For SNP tables with a reference sequence, the user can ask that the + reference be indicated in the tree, to help with rooting it. If the + SNP table has no reference sequence, this option has no effect. + +Data source + The genetic distance between two individuals at a given SNP can + be estimated two ways. One method is to use the absolute value of + difference in the frequency of the first allele (equivalently: the + second allele). For instance, if the first individual has 5 reads of + each allele and the second individual has respectively 3 and 6 reads, + then the frequencies are 1/2 and 1/3, giving a distance 1/6 at that + SNP. The other approach is to use the SAMtools genotypes to estimate + the difference in the number of occurrences of the first allele. + For instance, if the two genotypes are 2 and 1, i.e., the individuals + are estimated to have respectively 2 and 1 occurrences of the first + allele at this location, then the distance is 1 (the absolute value + of the difference of the two numbers). + +Output format + There are three options, as described above. + +**Acknowledgments** + +To convert the distance matrix to a NEWICK-formatted tree, we use the QuickTree program, downloaded from: http://www.sanger.ac.uk/resources/software/quicktree/ + +To draw the tree, we use the program draw_tree, downloaded from: http://compgen.bscb.cornell.edu/phast/ + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/population_structure.py Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,121 @@ +#!/usr/bin/env python + +import errno +import os +import shutil +import subprocess +import sys +from BeautifulSoup import BeautifulSoup +import gd_composite + +################################################################################ + +def run_admixture(ped_file, populations): + prog = 'admixture' + + args = [] + args.append(prog) + args.append(input_ped_file) + args.append(populations) + + #print "args:", ' '.join(args) + ofh = open('/dev/null', 'w') + p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=ofh, stderr=sys.stderr) + rc = p.wait() + ofh.close() + +def run_r(input_file, output_file, populations): + prog = 'R' + + args = [] + args.append(prog) + args.append('--vanilla') + args.append('--quiet') + args.append('--args') + args.append(input_file) + args.append(output_file) + args.append(populations) + + _realpath = os.path.realpath(__file__) + _script_dir = os.path.dirname(_realpath) + r_script_file = os.path.join(_script_dir, 'population_structure.r') + + ifh = open(r_script_file) + ofh = open('/dev/null', 'w') + p = subprocess.Popen(args, bufsize=-1, stdin=ifh, stdout=ofh, stderr=None) + rc = p.wait() + ifh.close() + ofh.close() + +def mkdir_p(path): + try: + os.makedirs(path) + except OSError, e: + if e.errno <> errno.EEXIST: + raise + +def get_populations(input): + pops = [] + pop_names = {} + + with open(input) as fh: + soup = BeautifulSoup(fh) + misc = soup.find('div', {'id': 'gd_misc'}) + + return 'Populations\n{0}'.format(misc('ul')[0]) + +################################################################################ + +if len(sys.argv) != 6: + print >> sys.stderr, "Usage" + sys.exit(1) + +input_html_file, input_ped_file, output_file, extra_files_path, populations = sys.argv[1:6] +populations_html = get_populations(input_html_file) + +run_admixture(input_ped_file, populations) + +ped_base = os.path.basename(input_ped_file) +if ped_base.endswith('.ped'): + ped_base = ped_base[:-4] + +p_file = '%s.%s.P' % (ped_base, populations) +q_file = '%s.%s.Q' % (ped_base, populations) + +mkdir_p(extra_files_path) +numeric_output_file = os.path.join(extra_files_path, 'numeric.txt') +shutil.copy2(q_file, numeric_output_file) +os.remove(p_file) +os.remove(q_file) + +graphical_output_file = os.path.join(extra_files_path, 'graphical.pdf') +run_r(numeric_output_file, graphical_output_file, populations) + +################################################################################ + +info_page = gd_composite.InfoPage() +info_page.set_title('Population structure Galaxy Composite Dataset') + +display_file = gd_composite.DisplayFile() +display_value = gd_composite.DisplayValue() + +out_pdf = gd_composite.Parameter(name='graphical.pdf', value='graphical.pdf', display_type=display_file) +out_txt = gd_composite.Parameter(name='numeric.txt', value='numeric.txt', display_type=display_file) + +info_page.add_output_parameter(out_pdf) +info_page.add_output_parameter(out_txt) + +in_pops = gd_composite.Parameter(description='Number of populations', value=populations, display_type=display_value) + +info_page.add_input_parameter(in_pops) + +misc_pops = gd_composite.Parameter(description=populations_html, display_type=display_value) + +info_page.add_misc(misc_pops) + + +with open (output_file, 'w') as ofh: + print >> ofh, info_page.render() + + +sys.exit(0)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/population_structure.r Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,19 @@ +library(RColorBrewer) + +args = commandArgs(trailingOnly=TRUE) +q_file = args[[1]] +output_file = args[[2]] +populations = args[[3]] + +tbl <- read.table(q_file) + +if ( populations >= 3 && populations <= 12 ) { + colors = brewer.pal(populations, 'Paired') +} else { + colors = rainbow(populations) +} + +pdf(file=output_file, onefile=TRUE, width=7, height=3) +barplot(t(as.matrix(tbl)), col=colors, xlab="Individual #", ylab="Ancestry", border=NA) + +dev.off()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/prepare_population_structure.py Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,144 @@ +#!/usr/bin/env python + +import errno +import os +import shutil +import subprocess +import sys +from Population import Population +import gd_composite + +################################################################################ + +def do_import(filename, files_path, min_reads, min_qual, min_spacing, tags, using_info, population_list): + info_page = gd_composite.InfoPage() + info_page.set_title('Prepare to look for population structure Galaxy Composite Dataset') + + display_file = gd_composite.DisplayFile() + display_value = gd_composite.DisplayValue() + + out_ped = gd_composite.Parameter(name='admix.ped', value='admix.ped', display_type=display_file) + out_map = gd_composite.Parameter(name='admix.map', value='admix.map', display_type=display_file) + out_use = gd_composite.Parameter(description=using_info, display_type=display_value) + + info_page.add_output_parameter(out_ped) + info_page.add_output_parameter(out_map) + info_page.add_output_parameter(out_use) + + in_min_reads = gd_composite.Parameter(description='Minimum reads covering a SNP, per individual', value=min_reads, display_type=display_value) + in_min_qual = gd_composite.Parameter(description='Minimum quality value, per individual', value=min_qual, display_type=display_value) + in_min_spacing = gd_composite.Parameter(description='Minimum spacing between SNPs on the same scaffold', value=min_spacing, display_type=display_value) + + info_page.add_input_parameter(in_min_reads) + info_page.add_input_parameter(in_min_qual) + info_page.add_input_parameter(in_min_spacing) + + misc_populations = gd_composite.Parameter(name='Populations', value=population_list, display_type=gd_composite.DisplayPopulationList()) + info_page.add_misc(misc_populations) + + with open(filename, 'w') as ofh: + print >> ofh, info_page.render() + +def mkdir_p(path): + try: + os.makedirs(path) + except OSError, e: + if e.errno <> errno.EEXIST: + raise + +def die(message, exit=True): + print >> sys.stderr, message + if exit: + sys.exit(1) + +################################################################################ + +if len(sys.argv) < 9: + die("Usage") + +# parse command line +input_snp_filename, min_reads, min_qual, min_spacing, output_filename, output_files_path = sys.argv[1:7] +args = sys.argv[7:] + +individual_metadata = [] +population_files = [] +population_names = [] +all_individuals = False + +for arg in args: + if arg == 'all_individuals': + all_individuals = True + elif len(arg) > 11: + tag = arg[:11] + value = arg[11:] + if tag == 'individual:': + individual_metadata.append(value) + elif tag == 'population:': + filename, name = value.split(':', 1) + population_files.append(filename) + population_names.append(name) + +p_total = Population() +p_total.from_tag_list(individual_metadata) + +individual_population = {} + +population_list = [] + +if all_individuals: + p1 = p_total + p1.name = 'All Individuals' + population_list.append(p1) +else: + p1 = Population() + for idx in range(len(population_files)): + population_file = population_files[idx] + population_name = population_names[idx] + this_pop = Population(population_name) + this_pop.from_population_file(population_file) + population_list.append(this_pop) + p1.from_population_file(population_file) + tags = p1.tag_list() + for tag in tags: + if tag not in individual_population: + individual_population[tag] = population_name + +if not p_total.is_superset(p1): + print >> sys.stderr, 'There is an individual in the population that is not in the SNP table' + sys.exit(1) + +# run tool +prog = 'admix_prep' + +args = [] +args.append(prog) +args.append(input_snp_filename) +args.append(min_reads) +args.append(min_qual) +args.append(min_spacing) + +tags = p1.tag_list() +for tag in tags: + args.append(tag) + +#print "args:", ' '.join(args) +p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=subprocess.PIPE, stderr=sys.stderr) +(stdoutdata, stderrdata) = p.communicate() +rc = p.returncode + +if rc != 0: + die('admix_prep failed: rc={0}'.format(rc)) + +using_info = stdoutdata.rstrip('\r\n') +mkdir_p(output_files_path) +output_ped_filename = os.path.join(output_files_path, 'admix.ped') +output_map_filename = os.path.join(output_files_path, 'admix.map') +shutil.copy2('admix.ped', output_ped_filename) +shutil.copy2('admix.map', output_map_filename) +do_import(output_filename, output_files_path, min_reads, min_qual, min_spacing, tags, using_info, population_list) + +os.unlink('admix.ped') +os.unlink('admix.map') + +sys.exit(0) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/prepare_population_structure.xml Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,74 @@ +<tool id="gd_prepare_population_structure" name="Prepare" version="1.0.0"> + <description>to look for population structure</description> + + <command interpreter="python"> + prepare_population_structure.py "$input" "$min_reads" "$min_qual" "$min_spacing" "$output" "$output.files_path" + #if $individuals.choice == '0' + "all_individuals" + #else if $individuals.choice == '1' + #for $population in $individuals.populations + #set $pop_arg = 'population:%s:%s' % (str($population.p_input), str($population.p_input.name)) + "$pop_arg" + #end for + #end if + #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns) + #set $arg = 'individual:%s:%s' % ($individual_col, $individual) + "$arg" + #end for + </command> + + <inputs> + <param name="input" type="data" format="wsf" label="SNP table" /> + <param name="min_reads" type="integer" min="0" value="0" label="Minimum reads covering a SNP, per individual" /> + <param name="min_qual" type="integer" min="0" value="0" label="Minimum quality value, per individual" /> + <param name="min_spacing" type="integer" min="0" value="0" label="Minimum spacing between SNPs on the same scaffold" /> + <conditional name="individuals"> + <param name="choice" type="select" label="Individuals"> + <option value="0" selected="true">All</option> + <option value="1">Choose</option> + </param> + <when value="0" /> + <when value="1"> + <repeat name="populations" title="Population" min="1"> + <param name="p_input" type="data" format="ind" label="Individuals" /> + </repeat> + </when> + </conditional> + </inputs> + + <outputs> + <data name="output" format="wped"> + <actions> + <action type="metadata" name="base_name" default="admix" /> + </actions> + </data> + </outputs> + + <tests> + <test> + <param name="input" value="genome_diversity/test_in/sample.wsf" ftype="wsf" /> + <param name="min_reads" value="3" /> + <param name="min_qual" value="30" /> + <param name="min_spacing" value="0" /> + <param name="choice" value="0" /> + <output name="output" file="genome_diversity/test_out/prepare_population_structure/prepare_population_structure.html" ftype="html" compare="diff" lines_diff="2"> + <extra_files type="file" name='admix.map' value="genome_diversity/test_out/prepare_population_structure/admix.map" /> + <extra_files type="file" name='admix.ped' value="genome_diversity/test_out/prepare_population_structure/admix.ped" /> + </output> + </test> + </tests> + + <help> +**What it does** + +The tool converts a SNP table into two tables, called "admix.map" and +"admix.ped", needed for estimating the population structure. The user +can read or download those files, or simply pass this tool's output on to +other programs. The user imposes conditions on which SNPs to consider, +such as the minimum coverage and/or quality value for every individual, +or the distance to the closest SNP in the same contig (as named in the +first column of the SNP table). A useful piece of information produced +by the tool is the number of SNPs meeting those conditions, which can +be found by clicking on the "eye" after the program runs. + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rank_pathways.xml Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,74 @@ +<tool id="gd_calc_freq" name="Rank" version="1.0.0"> + <description>affected KEGG pathways</description> + + <command interpreter="python"> + #if str($output_format) == 'a' + calctfreq.py + #else if str($output_format) == 'b' + calclenchange.py + #end if + "--loc_file=${GALAXY_DATA_INDEX_DIR}/gd.rank.loc" + "--species=${input.metadata.dbkey}" + "--input=${input}" + "--output=${output}" + "--posKEGGclmn=${input.metadata.kegg_path}" + "--KEGGgeneposcolmn=${input.metadata.kegg_gene}" + </command> + + <inputs> + <param name="input" type="data" format="wpf" label="Table"> + <validator type="metadata" check="kegg_gene,kegg_path" message="Missing KEGG gene code column and/or KEGG pathway code/name column metadata. Click the pencil icon in the history item to edit/save the metadata attributes" /> + </param> + <param name="output_format" type="select" label="Output format"> + <option value="a" selected="true">ranked by percentage of genes affected</option> + <option value="b">ranked by change in length and number of paths</option> + </param> + </inputs> + + <outputs> + <data name="output" format="tabular" /> + </outputs> + + <tests> + <test> + <param name="input" value="genome_diversity/test_in/sample.wpf" ftype="wpf" /> + <param name="output_format" value="a" /> + <output name="output" file="genome_diversity/test_out/rank_pathways/rank_pathways.tabular" /> + </test> + </tests> + + <help> +**What it does** + +This tool produces a table ranking the pathways based on the percentage +of genes in an input dataset, out of the total in each pathway. +Alternatively, the tool ranks the pathways based on the change in +length and number of paths connecting sources and sinks. This change is +calculated between graphs representing pathways with and without excluding +the nodes that represent the genes in an input list. Sources are all +the nodes representing the initial reactants/products in the pathway. +Sinks are all the nodes representing the final reactants/products in +the pathway. + +If pathways are ranked by percentage of genes affected, the output is +a tabular dataset with the following columns: + + 1. number of genes in the pathway present in the input dataset + 2. percentage of the total genes in the pathway included in the input dataset + 3. rank of the frequency (from high freq to low freq) + 4. name of the pathway + +If pathways are ranked by change in length and number of paths, the +output is a tabular dataset with the following columns: + + 1. change in the mean length of paths between sources and sinks + 2. mean length of paths between sources and sinks in the pathway including the genes in the input dataset. If the pathway do not have sources/sinks, the length is assumed to be infinite (I) + 3. mean length of paths between sources and sinks in the pathway excluding the genes in the input dataset. If the pathway do not have sources/sinks, the length is assumed to be infinite (I) + 4. rank of the change in the mean length of paths between sources and sinks (from high change to low change) + 5. change in the number of paths between sources and sinks + 6. number of paths between sources and sinks in the pathway including the genes in the input dataset. If the pathway do not have sources/sinks, it is assumed to be a circuit (C) + 7. number of paths between sources and sinks in the pathway excluding the genes in the input dataset. If the pathway do not have sources/sinks, it is assumed to be a circuit (C) + 8. rank of the change in the number of paths between sources and sinks (from high change to low change) + 9. name of the pathway + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rename_individuals.py Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,33 @@ +#!/usr/bin/env python + +import sys +from Population import Population + +input_file, output_file = sys.argv[1:3] +tags = sys.argv[3:] + +aliases = {} + +for tag in tags: + column, alias = tag.split(':', 1) + column = column.strip() + alias = alias.strip() + if alias: + aliases[column] = alias + +p_total = Population() +p_total.from_population_file(input_file) + +with open(output_file, 'w') as ofh: + for column in p_total.column_list(): + individual = p_total.individual_with_column(column) + real_name = individual.real_name + rv = [column, real_name, ''] + if column in aliases: + alias = aliases[column] + if alias != real_name: + rv[2] = alias + print >> ofh, '\t'.join(rv) + +sys.exit(0) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rename_individuals.xml Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,34 @@ +<tool id="gd_rename_individuals" name="Rename" version="1.0.0"> + <description>individuals</description> + + <command interpreter="python"> + rename_individuals.py "$input" "$output" + #for $individual in $individuals + #set $arg = '%s:%s' % (str($individual.column), str($individual.alias)) + "$arg" + #end for + </command> + + <inputs> + <param name="input" type="data" format="ind"/> + + <repeat name="individuals" title="Rename individual" min="1"> + <param name="column" type="select" label="Choose individual to rename"> + <options from_dataset="input"> + <column name="name" index="1"/> + <column name="value" index="0"/> + </options> + </param> + <param name="alias" type="text" label="Enter the new name" /> + </repeat> + </inputs> + + <outputs> + <data name="output" format="ind" label="Individuals from ${input.hid}" /> + </outputs> + + <help> +**What it does** + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rtrnKEGGpthwfENSEMBLTc.py Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,78 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# calclenchange.py +# +# Copyright 2011 Oscar Bedoya-Reina <oscar@niska.bx.psu.edu> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301, USA. + +import argparse,os,sys + + +def main(): + parser = argparse.ArgumentParser(description='Adds the fields KEGG gene codes and KEGG pathways to an input table of ENSEMBL transcript codes.') + parser.add_argument('--loc_file',metavar='correlational database',type=str,help='correlational database') + parser.add_argument('--species',metavar='species name',type=str,help='the species of interest in loc_file') + parser.add_argument('--output',metavar='output TXT file',type=str,help='the output file with the table in txt format. The output will have two more fields: KEGG gene codes and KEGG pathways of each ENSEMBL code' ) + parser.add_argument('--posENSEMBLclmn',metavar='column number',type=int,help='the column with the ENSEMBLE transcript code') + parser.add_argument('--input',metavar='input TXT file',type=str,help='the input file with the table in txt format') + #~ + #~Open arguments + class C(object): + pass + fulargs=C() + parser.parse_args(sys.argv[1:],namespace=fulargs) + #test input vars + inputf,loc_file,species,output,posENSEMBLclmn=fulargs.input,fulargs.loc_file,fulargs.species,fulargs.output,fulargs.posENSEMBLclmn + posENSEMBLclmn-=1#correct pos + #~ Get the extra variables + crDB=[x.split() for x in open(loc_file).read().splitlines() if x.split()[0]==species][0] + sppPrefx,dinput=crDB[0],crDB[1]#X should be replaced by the position in which the Conversion Dictionary File (CDF) is placed + #make a dictionary of the input CDF + dKEGGcPthws=dict([(x.split('\t')[0],'\t'.join(x.split('\t')[1:])) for x in open(dinput).read().splitlines() if x.strip()]) + #~ add the two new columns + sall=[] + #lENSEMBLTc=[x.split('\t') for x in open(inputf).read().splitlines() if x.strip()] + lENSEMBLTc = [] + with open(inputf) as fh: + for line in fh: + if line.startswith('#'): + continue + lENSEMBLTc.append(line.rstrip('\r\n').split('\t')) + nLines=len(lENSEMBLTc) + cLines=0 + sall=[]#the output list for with additional fields + #~ + while cLines<nLines: + cLines+=1 + lENSEMBLTcKEGGgKEGGpth=lENSEMBLTc.pop(0) + ENSEMBLTc=lENSEMBLTcKEGGgKEGGpth[posENSEMBLclmn] + try: + KEGGgKEGGpth=dKEGGcPthws[ENSEMBLTc] + except: + KEGGgKEGGpth='\t'.join(['U','N']) + sall.append('\t'.join(['\t'.join(lENSEMBLTcKEGGgKEGGpth),KEGGgKEGGpth])) + #~ + salef=open(output,'w') + salef.write('\n'.join(sall)) + salef.close() + return 0 + + +if __name__ == '__main__': + main() +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/select_individuals.xml Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,42 @@ +<tool id="gd_select_individuals" name="Select" version="1.0.0"> + <description>individuals from a SNP table</description> + + <command interpreter="bash"> + echo.bash "$input" "$output" + #for $individual in str($individuals).split(',') + #set $individual_idx = $input.dataset.metadata.individual_names.index($individual) + #set $individual_col = str( $input.dataset.metadata.individual_columns[$individual_idx] ) + #set $arg = '\t'.join([$individual_col, $individual, '']) + "$arg" + #end for + </command> + + <inputs> + <param name="input" type="data" format="wsf"/> + <param name="individuals" type="select" display="checkboxes" multiple="true" label="Individuals to include"> + <options> + <filter type="data_meta" ref="input" key="individual_names" /> + </options> + <validator type="no_options" message="You must select at least one individual"/> + </param> + </inputs> + + <outputs> + <data name="output" format="ind" label="Individuals from ${input.hid}" /> + </outputs> + + <tests> + <test> + <param name="input" value="genome_diversity/test_in/sample.wsf" ftype="wsf" /> + <param name="individuals" value="PB1,PB2" /> + <output name="output" file="genome_diversity/test_in/a.ind" /> + </test> + </tests> + + <help> +**What it does** + +The user selects a SNP table and a set of individuals from the table. +The command saves that list of individuals for use by other Galaxy tools. + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/select_restriction_enzymes.py Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,103 @@ +#!/usr/bin/env python2.5 + +import os +import sys +from optparse import OptionParser +import genome_diversity as gd + +def main_function( parse_arguments=None ): + if parse_arguments is None: + parse_arguments = lambda arguments: ( None, arguments ) + def main_decorator( to_decorate ): + def decorated_main( arguments=None ): + if arguments is None: + arguments = sys.argv + options, arguments = parse_arguments( arguments ) + rc = 1 + try: + rc = to_decorate( options, arguments ) + except Exception, err: + sys.stderr.write( 'ERROR: %s\n' % str( err ) ) + traceback.print_exc() + finally: + sys.exit( rc ) + return decorated_main + return main_decorator + +def parse_arguments( arguments ): + parser = OptionParser() + parser.add_option('--input', + type='string', dest='input', + help='file of selected SNPs') + parser.add_option('--output', + type='string', dest='output', + help='output file') + parser.add_option('--primers_loc', + type='string', dest='primers_loc', + help='primers .loc file') + parser.add_option('--scaffold_col', + type="int", dest='scaffold_col', + help='scaffold column in the input file') + parser.add_option('--pos_col', + type="int", dest='pos_col', + help='position column in the input file') + parser.add_option('--enzyme_list', + type="string", dest='enzyme_list_string', + help='comma separated list of enzymes') + parser.add_option('--species', + type="string", dest='species', + help='species') + return parser.parse_args( arguments[1:] ) + + +@main_function( parse_arguments ) +def main( options, arguments ): + if not options.input: + raise RuntimeError( 'missing --input option' ) + if not options.output: + raise RuntimeError( 'missing --output option' ) + if not options.primers_loc: + raise RuntimeError( 'missing --primers_loc option' ) + if not options.scaffold_col: + raise RuntimeError( 'missing --scaffold_col option' ) + if not options.pos_col: + raise RuntimeError( 'missing --pos_col option' ) + if not options.enzyme_list_string: + raise RuntimeError( 'missing --enzyme_list option' ) + if not options.species: + raise RuntimeError( 'missing --species option' ) + + snps = gd.SnpFile( filename=options.input, seq_col=int( options.scaffold_col ), pos_col=int( options.pos_col ) ) + + out_fh = gd._openfile( options.output, 'w' ) + + enzyme_dict = {} + for enzyme in options.enzyme_list_string.split( ',' ): + enzyme = enzyme.strip() + if enzyme: + enzyme_dict[enzyme] = 1 + + primer_data_file = gd.get_filename_from_loc( options.species, options.primers_loc ) + file_root, file_ext = os.path.splitext( primer_data_file ) + primer_index_file = file_root + ".cdb" + primers = gd.PrimersFile( data_file=primer_data_file, index_file=primer_index_file ) + + comments_printed = False + + while snps.next(): + seq, pos = snps.get_seq_pos() + enzyme_list = primers.get_enzymes( seq, pos ) + for enzyme in enzyme_list: + if enzyme in enzyme_dict: + if not comments_printed: + for comment in snps.comments: + out_fh.write( "%s\n" % comment ) + comments_printed = True + out_fh.write( "%s\n" % snps.line ) + break + + out_fh.close() + +if __name__ == "__main__": + main() +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/select_restriction_enzymes.xml Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,90 @@ +<tool id="gd_select_restriction_enzymes" name="Specify" version="1.0.0"> + <description>a set of restriction enzymes</description> + + <command interpreter="python"> + select_restriction_enzymes.py "--input=$input" "--output=$output" "--primers_loc=${GALAXY_DATA_INDEX_DIR}/gd.primers.loc" + #if $override_metadata.choice == "0": + "--scaffold_col=${input.metadata.scaffold}" "--pos_col=${input.metadata.pos}" "--species=${input.metadata.species}" + #else + "--scaffold_col=$scaf_col" "--pos_col=$pos_col" "--species=$species" + #end if + "--enzyme_list=$enzymes" + </command> + + <inputs> + <param format="tabular" name="input" type="data" label="Selected SNPS dataset"/> + <conditional name="override_metadata"> + <param name="choice" type="select" format="integer" label="choose columns"> + <option value="0" selected="true">No, get columns from metadata</option> + <option value="1" >Yes, choose columns</option> + </param> + <when value="0"> + <!-- no options --> + </when> + <when value="1"> + <param name="scaf_col" type="data_column" data_ref="input" numerical="false" label="Column with scaffold"/> + <param name="pos_col" type="data_column" data_ref="input" numerical="true" label="Column with position"/> + <param name="species" type="select" label="Choose species"> + <options from_file="gd.species.txt"> + <column name="name" index="1"/> + <column name="value" index="0"/> + </options> + </param> + </when> + </conditional> + + <param name="enzymes" type="select" display="checkboxes" multiple="true" label="Choose enzymes"> + <options from_file="gd.restriction_enzymes.txt"> + <column name="name" index="0"/> + <column name="value" index="1"/> + </options> + </param> + </inputs> + + <outputs> + <data format="wsf" name="output" metadata_source="input"/> + </outputs> + + <tests> + <test> + <param name="input" value="gd.sample.wsf" ftype="wsf"/> + <param name="choice" value="0"/> + <param name="enzymes" value="BanI,BstOI,Hsp92II"/> + <output name="output" file="gd.select_restriction_enzymes.wsf"/> + </test> + </tests> + + <help> +**What it does** + + It selects the SNPs that are differentially cut by at least one of the + specified restriction enzymes. The enzymes are required to cut the amplified + segment (for the specified PCR primers) only at the SNP. + +----- + +**Example** + +- input file:: + + chr2_75111355_75112576 314 A C L F chr2 75111676 C F 15 4 53 2 9 48 Y 96 0.369 0.355 0.396 0 + chr8_93901796_93905612 2471 A C A A chr8 93904264 A A 8 0 51 10 2 14 Y 961 0.016 0.534 0.114 2 + chr10_7434473_7435447 524 T C S S chr10 7435005 T S 11 5 90 14 0 69 Y 626 0.066 0.406 0.727 0 + chr14_80021455_80022064 138 G A H H chr14 80021593 G H 14 0 69 9 6 124 Y 377 0.118 0.997 0.195 1 + chr15_64470252_64471048 89 G A Y Y chr15 64470341 G Y 5 6 109 14 0 69 Y 312 0.247 0.998 0.393 0 + chr18_48070585_48071386 514 C T E K chr18 48071100 T K 7 7 46 14 0 69 Y 2 0.200 0.032 0.163 0 + chr18_50154905_50155664 304 A G Y C chr18 50155208 A Y 4 2 17 5 1 22 Y 8 0.022 0.996 0.128 0 + chr18_57379354_57380496 315 C T V V chr18 57379669 G V 11 0 60 9 6 62 Y 726 0.118 0.048 0.014 1 + chr19_14240610_14242055 232 C T A V chr19 14240840 C A 18 8 56 15 5 42 Y 73 0.003 0.153 0.835 0 + chr19_39866997_39874915 3117 C T P P chr19 39870110 C P 3 7 65 14 2 32 Y 6 0.321 0.911 0.462 4 + etc. + +- output file:: + + chr8_93901796_93905612 2471 A C A A chr8 93904264 A A 8 0 51 10 2 14 Y 961 0.016 0.534 0.114 2 + chr14_80021455_80022064 138 G A H H chr14 80021593 G H 14 0 69 9 6 124 Y 377 0.118 0.997 0.195 1 + chr18_57379354_57380496 315 C T V V chr18 57379669 G V 11 0 60 9 6 62 Y 726 0.118 0.048 0.014 1 + chr19_39866997_39874915 3117 C T P P chr19 39870110 C P 3 7 65 14 2 32 Y 6 0.321 0.911 0.462 4 + etc. + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/select_snps.py Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,153 @@ +#!/usr/bin/env python + +import os +import sys +import math +from optparse import OptionParser +import genome_diversity as gd + +def main_function(parse_arguments=None): + if parse_arguments is None: + parse_arguments = lambda arguments: (None, arguments) + def main_decorator(to_decorate): + def decorated_main(arguments=None): + if arguments is None: + arguments = sys.argv + options, arguments = parse_arguments(arguments) + sys.exit(to_decorate(options, arguments)) + return decorated_main + return main_decorator + +def parse_arguments(arguments): + parser = OptionParser() + parser.add_option('--input', dest='input') + parser.add_option('--output', dest='output') + parser.add_option('--index_dir', dest='index_dir') + parser.add_option('--num_snps', dest='num_snps') + parser.add_option('--ref_chrom_col', dest='ref_chrom_col') + parser.add_option('--ref_pos_col', dest='ref_pos_col') + parser.add_option('--ref_species', dest='ref_species') + return parser.parse_args(arguments[1:]) + +@main_function(parse_arguments) +def main(options, arguments): + + ref_chrom_idx = to_int( options.ref_chrom_col ) -1 + ref_pos_idx = to_int( options.ref_pos_col ) -1 + + if (ref_chrom_idx < 1) or (ref_pos_idx < 1) or (ref_chrom_idx == ref_pos_idx): + print >> sys.stderr, "Cannot locate reference genome sequence (ref) or reference genome position (rPos) column for this dataset." + sys.exit(1) + + chrom_len_root = os.path.join( options.index_dir, 'shared/ucsc/chrom') + chrom_len_file = '%s.len' % options.ref_species + chrom_len_path = os.path.join(chrom_len_root, chrom_len_file) + + chrlens = gd.ChrLens( chrom_len_path ) + + total_len = 0 + for chrom in chrlens: + total_len += chrlens.length(chrom) + + total_requested = int( options.num_snps ) + lines, data, comments = get_snp_lines_data_and_comments( options.input, ref_chrom_idx, ref_pos_idx ) + selected = select_snps( data, total_len, total_requested ) + out_data = fix_selection_and_order_like_input(data, selected, total_requested) + write_selected_snps( options.output, out_data, lines, comments ) + +def to_int( value ): + try: + int_value = int( value ) + except ValueError: + int_value = 0 + return int_value + +def get_snp_lines_data_and_comments( filename, chrom_idx, pos_idx ): + fh = open( filename, 'r' ) + if (chrom_idx >= pos_idx): + needed = chrom_idx + 1 + else: + needed = pos_idx + 1 + lines = [] + data = [] + comments = [] + line_idx = 0 + line_num = 0 + for line in fh: + line_num += 1 + line = line.rstrip('\r\n') + if line: + if line.startswith('#'): + comments.append(line) + else: + elems = line.split('\t') + if len(elems) >= needed: + chrom = elems[chrom_idx] + try: + pos = int(elems[pos_idx]) + except ValueError: + sys.stderr.write( "bad reference position in line %d column %d: %s\n" % ( line_num, pos_idx+1, elems[pos_idx] ) ) + sys.exit(1) + lines.append(line) + chrom_sort = chrom.lstrip('chr') + data.append( [chrom_sort, chrom, pos, line_num, line_idx] ) + line_idx += 1 + fh.close() + data = sorted( data, key=lambda x: (x[0], x[2]) ) + return lines, data, comments + +def select_snps( data, total_len, requested ): + old_chrom = None + next_print = 0 + selected = [] + space = total_len / requested + for data_idx, datum in enumerate( data ): + chrom = datum[1] + pos = datum[2] + if chrom != old_chrom: + old_chrom = chrom + next_print = 0 + if pos >= next_print: + selected.append(data_idx) + next_print += space + return selected + +def fix_selection_and_order_like_input(data, selected, requested): + total_selected = len( selected ) + a = float( total_selected ) / requested + b = a / 2 + + idx_list = [] + for i in range( requested ): + idx = int( math.ceil( i * a + b ) - 1 ) + idx_list.append( idx ) + + out_data = [] + + for i, data_idx in enumerate(selected): + if total_selected > requested: + if i in idx_list: + out_data.append(data[data_idx]) + else: + out_data.append(data[data_idx]) + + out_data = sorted( out_data, key=lambda x: x[3] ) + + return out_data + +def write_selected_snps( filename, data, lines, comments ): + fh = open( filename, 'w' ) + + for comment in comments: + fh.write("%s\n" % comment ) + + for datum in data: + line_idx = datum[4] + fh.write("%s\n" % lines[line_idx]) + + fh.close() + +if __name__ == "__main__": + main() + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/select_snps.xml Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,87 @@ +<tool id="gd_select_snps" name="Select" version="1.0.0"> + <description>a specified number of SNPs</description> + + <command interpreter="python"> + select_snps.py "--input=$input" "--output=$output" "--index_dir=$GALAXY_DATA_INDEX_DIR" "--num_snps=$num_snps" + #if $override_metadata.choice == "0": + "--ref_chrom_col=${input.metadata.ref}" "--ref_pos_col=${input.metadata.rPos}" "--ref_species=${input.metadata.dbkey}" + #else + "--ref_chrom_col=$ref_col" "--ref_pos_col=$rpos_col" "--ref_species=$ref_species" + #end if + </command> + + <inputs> + <param format="tabular" name="input" type="data" label="Selected SNPS dataset"> + <validator type="unspecified_build" message="This dataset does not have a reference species and cannot be used with this tool" /> + </param> + <param name="num_snps" type="integer" value="10" optional="false" min="1" label="Number of SNPs"/> + <conditional name="override_metadata"> + <param name="choice" type="select" format="integer" label="choose columns"> + <option value="0" selected="true">No, get columns from metadata</option> + <option value="1" >Yes, choose columns</option> + </param> + <when value="0" /> + <when value="1"> + <param name="ref_col" type="data_column" data_ref="input" numerical="false" label="Column with reference chromosome"/> + <param name="rpos_col" type="data_column" data_ref="input" numerical="true" label="Column with reference position"/> + <param name="ref_species" type="select" label="Choose reference species"> + <options from_file="gd.ref_species.txt"> + <column name="name" index="1"/> + <column name="value" index="0"/> + </options> + </param> + </when> + </conditional> + </inputs> + + <outputs> + <data format="wsf" name="output" metadata_source="input"/> + </outputs> + + <tests> + <test> + <param name="input" value="genome_diversity/test_in/sample.wsf" ftype="wsf"/> + <param name="num_snps" value="100"/> + <param name="choice" value="0"/> + <output name="output" file="genome_diversity/test_out/select_snps/select_snps.wsf" /> + </test> + </tests> + + + <help> +**What it does** + + It attempts to select a specified number of SNPs from the dataset, making them + approximately uniformly spaced relative to the reference genome. The number + actually selected may be slightly more than the specified number. + +----- + +**Example** + +- input file:: + + chr2_75111355_75112576 314 A C L F chr2 75111676 C F 15 4 53 2 9 48 Y 96 0.369 0.355 0.396 0 + chr8_93901796_93905612 2471 A C A A chr8 93904264 A A 8 0 51 10 2 14 Y 961 0.016 0.534 0.114 2 + chr10_7434473_7435447 524 T C S S chr10 7435005 T S 11 5 90 14 0 69 Y 626 0.066 0.406 0.727 0 + chr14_80021455_80022064 138 G A H H chr14 80021593 G H 14 0 69 9 6 124 Y 377 0.118 0.997 0.195 1 + chr15_64470252_64471048 89 G A Y Y chr15 64470341 G Y 5 6 109 14 0 69 Y 312 0.247 0.998 0.393 0 + chr18_48070585_48071386 514 C T E K chr18 48071100 T K 7 7 46 14 0 69 Y 2 0.200 0.032 0.163 0 + chr18_50154905_50155664 304 A G Y C chr18 50155208 A Y 4 2 17 5 1 22 Y 8 0.022 0.996 0.128 0 + chr18_57379354_57380496 315 C T V V chr18 57379669 G V 11 0 60 9 6 62 Y 726 0.118 0.048 0.014 1 + chr19_14240610_14242055 232 C T A V chr19 14240840 C A 18 8 56 15 5 42 Y 73 0.003 0.153 0.835 0 + chr19_39866997_39874915 3117 C T P P chr19 39870110 C P 3 7 65 14 2 32 Y 6 0.321 0.911 0.462 4 + etc. + +- output file:: + + chr2_75111355_75112576 314 A C L F chr2 75111676 C F 15 4 53 2 9 48 Y 96 0.369 0.355 0.396 0 + chr8_93901796_93905612 2471 A C A A chr8 93904264 A A 8 0 51 10 2 14 Y 961 0.016 0.534 0.114 2 + chr10_7434473_7435447 524 T C S S chr10 7435005 T S 11 5 90 14 0 69 Y 626 0.066 0.406 0.727 0 + chr14_80021455_80022064 138 G A H H chr14 80021593 G H 14 0 69 9 6 124 Y 377 0.118 0.997 0.195 1 + chr15_64470252_64471048 89 G A Y Y chr15 64470341 G Y 5 6 109 14 0 69 Y 312 0.247 0.998 0.393 0 + chr18_48070585_48071386 514 C T E K chr18 48071100 T K 7 7 46 14 0 69 Y 2 0.200 0.032 0.163 0 + chr19_14240610_14242055 232 C T A V chr19 14240840 C A 18 8 56 15 5 42 Y 73 0.003 0.153 0.835 0 + etc. + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/specify_restriction_enzymes.py Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,103 @@ +#!/usr/bin/env python2.5 + +import os +import sys +from optparse import OptionParser +import genome_diversity as gd + +def main_function( parse_arguments=None ): + if parse_arguments is None: + parse_arguments = lambda arguments: ( None, arguments ) + def main_decorator( to_decorate ): + def decorated_main( arguments=None ): + if arguments is None: + arguments = sys.argv + options, arguments = parse_arguments( arguments ) + rc = 1 + try: + rc = to_decorate( options, arguments ) + except Exception, err: + sys.stderr.write( 'ERROR: %s\n' % str( err ) ) + traceback.print_exc() + finally: + sys.exit( rc ) + return decorated_main + return main_decorator + +def parse_arguments( arguments ): + parser = OptionParser() + parser.add_option('--input', + type='string', dest='input', + help='file of selected SNPs') + parser.add_option('--output', + type='string', dest='output', + help='output file') + parser.add_option('--primers_loc', + type='string', dest='primers_loc', + help='primers .loc file') + parser.add_option('--scaffold_col', + type="int", dest='scaffold_col', + help='scaffold column in the input file') + parser.add_option('--pos_col', + type="int", dest='pos_col', + help='position column in the input file') + parser.add_option('--enzyme_list', + type="string", dest='enzyme_list_string', + help='comma separated list of enzymes') + parser.add_option('--species', + type="string", dest='species', + help='species') + return parser.parse_args( arguments[1:] ) + + +@main_function( parse_arguments ) +def main( options, arguments ): + if not options.input: + raise RuntimeError( 'missing --input option' ) + if not options.output: + raise RuntimeError( 'missing --output option' ) + if not options.primers_loc: + raise RuntimeError( 'missing --primers_loc option' ) + if not options.scaffold_col: + raise RuntimeError( 'missing --scaffold_col option' ) + if not options.pos_col: + raise RuntimeError( 'missing --pos_col option' ) + if not options.enzyme_list_string: + raise RuntimeError( 'missing --enzyme_list option' ) + if not options.species: + raise RuntimeError( 'missing --species option' ) + + snps = gd.SnpFile( filename=options.input, seq_col=int( options.scaffold_col ), pos_col=int( options.pos_col ) ) + + out_fh = gd._openfile( options.output, 'w' ) + + enzyme_dict = {} + for enzyme in options.enzyme_list_string.split( ',' ): + enzyme = enzyme.strip() + if enzyme: + enzyme_dict[enzyme] = 1 + + primer_data_file = gd.get_filename_from_loc( options.species, options.primers_loc ) + file_root, file_ext = os.path.splitext( primer_data_file ) + primer_index_file = file_root + ".cdb" + primers = gd.PrimersFile( data_file=primer_data_file, index_file=primer_index_file ) + + comments_printed = False + + while snps.next(): + seq, pos = snps.get_seq_pos() + enzyme_list = primers.get_enzymes( seq, pos ) + for enzyme in enzyme_list: + if enzyme in enzyme_dict: + if not comments_printed: + for comment in snps.comments: + out_fh.write( "%s\n" % comment ) + comments_printed = True + out_fh.write( "%s\n" % snps.line ) + break + + out_fh.close() + +if __name__ == "__main__": + main() +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/specify_restriction_enzymes.xml Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,88 @@ +<tool id="gd_specify_restriction_enzymes" name="Specify" version="1.0.0"> + <description>a set of restriction enzymes</description> + + <command interpreter="python"> + specify_restriction_enzymes.py "--input=$input" "--output=$output" "--primers_loc=${GALAXY_DATA_INDEX_DIR}/gd.primers.loc" + #if $override_metadata.choice == "0": + "--scaffold_col=${input.metadata.scaffold}" "--pos_col=${input.metadata.pos}" "--species=${input.metadata.species}" + #else + "--scaffold_col=$scaf_col" "--pos_col=$pos_col" "--species=$species" + #end if + "--enzyme_list=$enzymes" + </command> + + <inputs> + <param format="tabular" name="input" type="data" label="Selected SNPS dataset"/> + <conditional name="override_metadata"> + <param name="choice" type="select" format="integer" label="choose columns"> + <option value="0" selected="true">No, get columns from metadata</option> + <option value="1" >Yes, choose columns</option> + </param> + <when value="0" /> + <when value="1"> + <param name="scaf_col" type="data_column" data_ref="input" numerical="false" label="Column with scaffold"/> + <param name="pos_col" type="data_column" data_ref="input" numerical="true" label="Column with position"/> + <param name="species" type="select" label="Choose species"> + <options from_file="gd.species.txt"> + <column name="name" index="1"/> + <column name="value" index="0"/> + </options> + </param> + </when> + </conditional> + + <param name="enzymes" type="select" display="checkboxes" multiple="true" label="Choose enzymes"> + <options from_file="gd.restriction_enzymes.txt"> + <column name="name" index="0"/> + <column name="value" index="1"/> + </options> + </param> + </inputs> + + <outputs> + <data format="wsf" name="output" metadata_source="input"/> + </outputs> + + <tests> + <test> + <param name="input" value="genome_diversity/test_out/select_snps/select_snps.wsf" ftype="wsf" /> + <param name="choice" value="0" /> + <param name="enzymes" value="Bsp1286I,HaeII,RsaI" /> + <output name="output" file="genome_diversity/test_out/specify_restriction_enzymes/specify_restriction_enzymes.wsf" /> + </test> + </tests> + + <help> +**What it does** + + It selects the SNPs that are differentially cut by at least one of the + specified restriction enzymes. The enzymes are required to cut the amplified + segment (for the specified PCR primers) only at the SNP. + +----- + +**Example** + +- input file:: + + chr2_75111355_75112576 314 A C L F chr2 75111676 C F 15 4 53 2 9 48 Y 96 0.369 0.355 0.396 0 + chr8_93901796_93905612 2471 A C A A chr8 93904264 A A 8 0 51 10 2 14 Y 961 0.016 0.534 0.114 2 + chr10_7434473_7435447 524 T C S S chr10 7435005 T S 11 5 90 14 0 69 Y 626 0.066 0.406 0.727 0 + chr14_80021455_80022064 138 G A H H chr14 80021593 G H 14 0 69 9 6 124 Y 377 0.118 0.997 0.195 1 + chr15_64470252_64471048 89 G A Y Y chr15 64470341 G Y 5 6 109 14 0 69 Y 312 0.247 0.998 0.393 0 + chr18_48070585_48071386 514 C T E K chr18 48071100 T K 7 7 46 14 0 69 Y 2 0.200 0.032 0.163 0 + chr18_50154905_50155664 304 A G Y C chr18 50155208 A Y 4 2 17 5 1 22 Y 8 0.022 0.996 0.128 0 + chr18_57379354_57380496 315 C T V V chr18 57379669 G V 11 0 60 9 6 62 Y 726 0.118 0.048 0.014 1 + chr19_14240610_14242055 232 C T A V chr19 14240840 C A 18 8 56 15 5 42 Y 73 0.003 0.153 0.835 0 + chr19_39866997_39874915 3117 C T P P chr19 39870110 C P 3 7 65 14 2 32 Y 6 0.321 0.911 0.462 4 + etc. + +- output file:: + + chr8_93901796_93905612 2471 A C A A chr8 93904264 A A 8 0 51 10 2 14 Y 961 0.016 0.534 0.114 2 + chr14_80021455_80022064 138 G A H H chr14 80021593 G H 14 0 69 9 6 124 Y 377 0.118 0.997 0.195 1 + chr18_57379354_57380496 315 C T V V chr18 57379669 G V 11 0 60 9 6 62 Y 726 0.118 0.048 0.014 1 + chr19_39866997_39874915 3117 C T P P chr19 39870110 C P 3 7 65 14 2 32 Y 6 0.321 0.911 0.462 4 + etc. + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_in/a.ind Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,2 @@ +9 PB1 +13 PB2
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_in/b.ind Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,2 @@ +17 PB3 +21 PB4
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_in/c.ind Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,2 @@ +25 PB6 +29 PB8
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_in/ensembl.tabular Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,150 @@ +ENSCAFT00000000001 +ENSCAFT00000000144 +ENSCAFT00000000160 +ENSCAFT00000000215 +ENSCAFT00000000233 +ENSCAFT00000000365 +ENSCAFT00000000507 +ENSCAFT00000000517 +ENSCAFT00000000674 +ENSCAFT00000000724 +ENSCAFT00000000760 +ENSCAFT00000000762 +ENSCAFT00000001047 +ENSCAFT00000001052 +ENSCAFT00000001063 +ENSCAFT00000001076 +ENSCAFT00000001104 +ENSCAFT00000001141 +ENSCAFT00000001146 +ENSCAFT00000001204 +ENSCAFT00000001219 +ENSCAFT00000001250 +ENSCAFT00000001352 +ENSCAFT00000001363 +ENSCAFT00000001421 +ENSCAFT00000001523 +ENSCAFT00000001575 +ENSCAFT00000001587 +ENSCAFT00000001597 +ENSCAFT00000002056 +ENSCAFT00000002100 +ENSCAFT00000002110 +ENSCAFT00000002175 +ENSCAFT00000002259 +ENSCAFT00000002460 +ENSCAFT00000002537 +ENSCAFT00000002577 +ENSCAFT00000002578 +ENSCAFT00000002660 +ENSCAFT00000002792 +ENSCAFT00000002849 +ENSCAFT00000002999 +ENSCAFT00000003163 +ENSCAFT00000003223 +ENSCAFT00000003307 +ENSCAFT00000003515 +ENSCAFT00000003560 +ENSCAFT00000003644 +ENSCAFT00000003824 +ENSCAFT00000003840 +ENSCAFT00000004092 +ENSCAFT00000004103 +ENSCAFT00000004208 +ENSCAFT00000004253 +ENSCAFT00000004311 +ENSCAFT00000004464 +ENSCAFT00000004511 +ENSCAFT00000004609 +ENSCAFT00000004673 +ENSCAFT00000004726 +ENSCAFT00000004799 +ENSCAFT00000004933 +ENSCAFT00000004993 +ENSCAFT00000005126 +ENSCAFT00000005142 +ENSCAFT00000005225 +ENSCAFT00000005323 +ENSCAFT00000005467 +ENSCAFT00000005496 +ENSCAFT00000005518 +ENSCAFT00000005653 +ENSCAFT00000005746 +ENSCAFT00000005749 +ENSCAFT00000005832 +ENSCAFT00000005972 +ENSCAFT00000006025 +ENSCAFT00000006114 +ENSCAFT00000006157 +ENSCAFT00000006219 +ENSCAFT00000006272 +ENSCAFT00000006453 +ENSCAFT00000006479 +ENSCAFT00000006507 +ENSCAFT00000006669 +ENSCAFT00000006689 +ENSCAFT00000006827 +ENSCAFT00000006891 +ENSCAFT00000007130 +ENSCAFT00000007145 +ENSCAFT00000007244 +ENSCAFT00000007375 +ENSCAFT00000007440 +ENSCAFT00000007467 +ENSCAFT00000007484 +ENSCAFT00000007527 +ENSCAFT00000007553 +ENSCAFT00000007697 +ENSCAFT00000007703 +ENSCAFT00000007747 +ENSCAFT00000007774 +ENSCAFT00000007776 +ENSCAFT00000007779 +ENSCAFT00000007859 +ENSCAFT00000007951 +ENSCAFT00000007959 +ENSCAFT00000008012 +ENSCAFT00000008063 +ENSCAFT00000008142 +ENSCAFT00000008198 +ENSCAFT00000008413 +ENSCAFT00000008540 +ENSCAFT00000008586 +ENSCAFT00000008588 +ENSCAFT00000008673 +ENSCAFT00000008678 +ENSCAFT00000008728 +ENSCAFT00000008769 +ENSCAFT00000008831 +ENSCAFT00000009074 +ENSCAFT00000009114 +ENSCAFT00000009614 +ENSCAFT00000009698 +ENSCAFT00000009710 +ENSCAFT00000010094 +ENSCAFT00000010141 +ENSCAFT00000010439 +ENSCAFT00000010496 +ENSCAFT00000010516 +ENSCAFT00000010531 +ENSCAFT00000010559 +ENSCAFT00000010593 +ENSCAFT00000010616 +ENSCAFT00000010630 +ENSCAFT00000010829 +ENSCAFT00000010865 +ENSCAFT00000010931 +ENSCAFT00000010977 +ENSCAFT00000010988 +ENSCAFT00000011187 +ENSCAFT00000011380 +ENSCAFT00000011397 +ENSCAFT00000011721 +ENSCAFT00000011730 +ENSCAFT00000011771 +ENSCAFT00000011789 +ENSCAFT00000011968 +ENSCAFT00000012081 +ENSCAFT00000012133 +ENSCAFT00000012159 +ENSCAFT00000012254
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_in/sample.wpf Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,401 @@ +#{"column_names":["contig","pos","ref","rPos","trns","pep","AA1","loc","AA2","KEGG","pred","path"],"pos":2,"rPos":4,"ref":3,"dbkey":"canFam2","scaffold":1,"species":"bear","kegg_gene":10,"kegg_path":12} +Contig39_chr1_3261104_3261850 414 chr1 3261546 ENSCAFT00000000001 ENSCAFP00000000001 S 667 F 476153 probably damaging cfa00230=Purine metabolism.cfa00500=Starch and sucrose metabolism.cfa00740=Riboflavin metabolism.cfa00760=Nicotinate and nicotinamide metabolism.cfa00770=Pantothenate and CoA biosynthesis.cfa01100=Metabolic pathways +Contig62_chr1_19011969_19012646 265 chr1 19012240 ENSCAFT00000000144 ENSCAFP00000000125 * 161 R 483960 probably damaging N +Contig36_chr1_20102654_20103213 365 chr1 20103029 ENSCAFT00000000160 ENSCAFP00000000140 R 407 Q 610160 possibly damaging N +Contig136_chr10_3710404_3714591 3079 chr10 3713499 ENSCAFT00000000215 ENSCAFP00000000194 T 103 P U benign N +Contig36_chr1_23682012_23682647 374 chr1 23682388 ENSCAFT00000000233 ENSCAFP00000000210 N 234 S 483973 benign N +Contig163_chr10_4573526_4574494 487 chr10 4574010 ENSCAFT00000000365 ENSCAFP00000000332 R 186 K 474414 benign cfa00450=Selenocompound metabolism.cfa00970=Aminoacyl-tRNA biosynthesis +Contig55_chr1_40056604_40059808 2081 chr1 40058686 ENSCAFT00000000507 ENSCAFP00000000458 I 247 K 484023 possibly damaging N +Contig17_chr1_40203628_40205630 1417 chr1 40205044 ENSCAFT00000000517 ENSCAFP00000000468 N 109 S 476233 benign N +Contig97_chr1_44847984_44848380 285 chr1 44848272 ENSCAFT00000000674 ENSCAFP00000000618 Q 27 R 611986 benign N +Contig214_chr10_16106753_16106969 121 chr10 16106873 ENSCAFT00000000724 ENSCAFP00000000668 A 301 T 609478 benign N +Contig75_chr1_45731970_45732932 436 chr1 45732397 ENSCAFT00000000760 ENSCAFP00000000701 I 490 V U benign N +Contig33_chr1_45614845_45617413 1835 chr1 45616685 ENSCAFT00000000760 ENSCAFP00000000701 A 4390 V U benign N +Contig95_chr10_18829724_18831056 914 chr10 18830645 ENSCAFT00000000762 ENSCAFP00000000703 A 512 V U possibly damaging N +Contig197_chr13_8622062_8623071 606 chr13 8622665 ENSCAFT00000001047 ENSCAFP00000000959 T 406 I 475067 possibly damaging cfa00240=Pyrimidine metabolism.cfa00410=beta-Alanine metabolism.cfa00770=Pantothenate and CoA biosynthesis.cfa00983=Drug metabolism - other enzymes.cfa01100=Metabolic pathways +Contig243_chr10_19959210_19960069 701 chr10 19959858 ENSCAFT00000001052 ENSCAFP00000000964 E 1345 K U benign N +Contig137_chr13_10622950_10624043 1039 chr13 10623979 ENSCAFT00000001063 ENSCAFP00000000975 E 10 K 481999 benign N +Contig137_chr13_10622950_10624043 1006 chr13 10623946 ENSCAFT00000001063 ENSCAFP00000000975 R 21 C 481999 probably damaging N +Contig115_chr12_4411478_4412322 124 chr12 4411614 ENSCAFT00000001076 ENSCAFP00000000986 R 177 H U benign N +Contig150_chr12_4438230_4439944 385 chr12 4438614 ENSCAFT00000001104 ENSCAFP00000001014 Y 277 D 607591 benign N +Contig84_chr1_52076858_52077103 80 chr1 52076943 ENSCAFT00000001141 ENSCAFP00000001046 C 147 Y 484064 benign N +Contig29_chr13_13215547_13217183 793 chr13 13216352 ENSCAFT00000001146 ENSCAFP00000001050 P 1 R 475076 probably damaging N +Contig251_chr10_22876556_22877097 152 chr10 22876714 ENSCAFT00000001204 ENSCAFP00000001103 E 1162 D 481203 benign N +Contig21_chr10_22964856_22965302 202 chr10 22965058 ENSCAFT00000001219 ENSCAFP00000001115 P 6 Q 474465 benign N +Contig199_chr12_5083018_5084534 453 chr12 5083472 ENSCAFT00000001250 ENSCAFP00000001144 I 185 T 481729.481731 benign N.cfa04145=Phagosome.cfa04514=Cell adhesion molecules (CAMs).cfa04612=Antigen processing and presentation.cfa04672=Intestinal immune network for IgA production.cfa04940=Type I diabetes mellitus.cfa05140=Leishmaniasis.cfa05145=Toxoplasmosis.cfa05150=Staphylococcus aureus infection.cfa05152=Tuberculosis.cfa05164=Influenza A.cfa05166=HTLV-I infection.cfa05168=Herpes simplex infection.cfa05310=Asthma.cfa05320=Autoimmune thyroid disease.cfa05322=Systemic lupus erythematosus.cfa05323=Rheumatoid arthritis.cfa05330=Allograft rejection.cfa05332=Graft-versus-host disease.cfa05416=Viral myocarditis +Contig41_chr13_21629998_21630487 161 chr13 21630157 ENSCAFT00000001352 ENSCAFP00000001239 P 729 S 482026 possibly damaging cfa00565=Ether lipid metabolism +Contig16_chr13_21786766_21788016 169 chr13 21786927 ENSCAFT00000001363 ENSCAFP00000001249 V 1142 A 475084 benign cfa03022=Basal transcription factors +Contig60_chr1_60333035_60333884 731 chr1 60333755 ENSCAFT00000001421 ENSCAFP00000001307 V 400 I 484096 benign N +Contig44_chr13_24555640_24556298 499 chr13 24556139 ENSCAFT00000001523 ENSCAFP00000001400 N 660 S 475088 benign N +Contig153_chr12_5955114_5958935 2950 chr12 5958094 ENSCAFT00000001575 ENSCAFP00000001449 E 13 D 481744 benign cfa04141=Protein processing in endoplasmic reticulum +Contig146_chr13_25076435_25077249 723 chr13 25077165 ENSCAFT00000001587 ENSCAFP00000001461 T 9 S 482035 benign N +Contig81_chr13_25579918_25582207 874 chr13 25580772 ENSCAFT00000001597 ENSCAFP00000001469 E 62 G 609411 benign N +Contig159_chr10_28604683_28606028 753 chr10 28605433 ENSCAFT00000002056 ENSCAFP00000001903 S 79 P 610014 benign N +Contig30_chr11_29945215_29949829 3973 chr11 29949181 ENSCAFT00000002100 ENSCAFP00000001944 M 282 T U benign N +Contig102_chr10_29039231_29041280 829 chr10 29040065 ENSCAFT00000002110 ENSCAFP00000001953 R 311 Q 481249 unknown N +Contig187_chr1_78583588_78584279 250 chr1 78583839 ENSCAFT00000002175 ENSCAFP00000002014 K 176 R 476310 benign N +Contig199_chr1_79234891_79237527 384 chr1 79235278 ENSCAFT00000002259 ENSCAFP00000002095 V 403 A 484151 benign N +Contig119_chr12_12212738_12214663 1005 chr12 12213720 ENSCAFT00000002460 ENSCAFP00000002280 R 749 Q 481785 possibly damaging N +Contig119_chr12_12212738_12214663 918 chr12 12213633 ENSCAFT00000002460 ENSCAFP00000002280 R 778 Q 481785 benign N +Contig39_chr14_10730123_10732539 335 chr14 10730462 ENSCAFT00000002537 ENSCAFP00000002356 V 1179 E U benign N +Contig41_chr1_84886710_84894794 3494 chr1 84890207 ENSCAFT00000002577 ENSCAFP00000002394 E 1089 K 484157 possibly damaging N +Contig182_chr12_13881114_13883427 1690 chr12 13882828 ENSCAFT00000002578 ENSCAFP00000002395 S 99 G 608906 benign N +Contig34_chr11_48151988_48152712 198 chr11 48152205 ENSCAFT00000002660 ENSCAFP00000002468 C 587 R U possibly damaging N +Contig37_chr10_34118256_34119269 437 chr10 34118687 ENSCAFT00000002792 ENSCAFP00000002588 A 377 T 474523 benign N +Contig21_chr14_16091274_16093278 716 chr14 16091997 ENSCAFT00000002849 ENSCAFP00000002642 R 126 C 475216 probably damaging N +Contig57_chr1_90983602_90984717 559 chr1 90984158 ENSCAFT00000002999 ENSCAFP00000002781 A 226 V U benign N +Contig45_chr12_15798569_15798849 141 chr12 15798709 ENSCAFT00000003163 ENSCAFP00000002938 N 342 S 474921 benign cfa03040=Spliceosome +Contig83_chr12_17852905_17859596 2392 chr12 17855305 ENSCAFT00000003223 ENSCAFP00000002995 E 770 Q 474925 benign N +Contig41_chr12_18725392_18725889 169 chr12 18725560 ENSCAFT00000003307 ENSCAFP00000003070 R 80 Q 609995 benign N +Contig9_chr14_26125779_26127414 486 chr14 26126264 ENSCAFT00000003515 ENSCAFP00000003259 P 123 T 482316 benign N +Contig132_chr1_101565951_101566612 255 chr1 101566210 ENSCAFT00000003560 ENSCAFP00000003298 L 588 F U unknown N +Contig142_chr1_102093954_102094392 121 chr1 102094072 ENSCAFT00000003644 ENSCAFP00000003373 K 120 E 484216 benign cfa00290=Valine, leucine and isoleucine biosynthesis.cfa00970=Aminoacyl-tRNA biosynthesis +Contig129_chr14_34071666_34074617 2313 chr14 34073957 ENSCAFT00000003824 ENSCAFP00000003537 T 282 I 475249 probably damaging N +Contig147_chr14_34262125_34262938 340 chr14 34262468 ENSCAFT00000003840 ENSCAFP00000003553 I 70 V 482333 benign N +Contig52_chr12_36031985_36035244 1237 chr12 36033208 ENSCAFT00000004092 ENSCAFP00000003784 Y 564 H 474960 benign N +Contig176_chr1_105494865_105495258 119 chr1 105494995 ENSCAFT00000004103 ENSCAFP00000003793 A 406 V 484298 benign N +Contig60_chr11_63130652_63131816 702 chr11 63131349 ENSCAFT00000004208 ENSCAFP00000003892 V 260 I 481637 benign N +Contig9_chr10_53579958_53582510 688 chr10 53580646 ENSCAFT00000004253 ENSCAFP00000003937 S 191 G 100534006.100534007.474588 benign N +Contig93_chr14_38451661_38452163 221 chr14 38451882 ENSCAFT00000004311 ENSCAFP00000003990 A 420 V 482346 benign N +Contig70_chr12_42859511_42860010 180 chr12 42859693 ENSCAFT00000004464 ENSCAFP00000004126 P 7 S 481892 possibly damaging N +Contig28_chr12_43447144_43449156 1136 chr12 43448279 ENSCAFT00000004511 ENSCAFP00000004169 V 582 M 481893 benign N +Contig18_chr13_62535238_62535697 227 chr13 62535471 ENSCAFT00000004609 ENSCAFP00000004263 E 277 D 611755 benign N +Contig282_chr1_108960925_108962235 205 chr1 108961141 ENSCAFT00000004673 ENSCAFP00000004325 A 149 V 611817 benign N +Contig110_chr1_109196028_109197290 987 chr1 109197021 ENSCAFT00000004726 ENSCAFP00000004374 E 330 D 610047 benign cfa00230=Purine metabolism.cfa00240=Pyrimidine metabolism.cfa01100=Metabolic pathways.cfa03030=DNA replication.cfa03410=Base excision repair.cfa03420=Nucleotide excision repair.cfa03430=Mismatch repair.cfa03440=Homologous recombination.cfa05166=HTLV-I infection +Contig89_chr11_69097905_69099099 568 chr11 69098443 ENSCAFT00000004799 ENSCAFP00000004445 E 1317 G U benign N +Contig118_chr14_46155051_46155557 173 chr14 46155218 ENSCAFT00000004933 ENSCAFP00000004572 S 110 L 482382 benign cfa04621=NOD-like receptor signaling pathway.cfa05133=Pertussis +Contig54_chr12_51910786_51912716 682 chr12 51911460 ENSCAFT00000004993 ENSCAFP00000004630 H 2889 Y 474995 benign cfa03008=Ribosome biogenesis in eukaryotes +Contig95_chr10_67698730_67699605 267 chr10 67698997 ENSCAFT00000005126 ENSCAFP00000004751 P 45 L U benign N +Contig265_chr17_3177908_3178389 332 chr17 3178241 ENSCAFT00000005142 ENSCAFP00000004763 A 306 P 606804 benign N +Contig322_chr17_4977962_4979371 1122 chr17 4979079 ENSCAFT00000005225 ENSCAFP00000004836 T 319 I 475647 possibly damaging N +Contig48_chr11_71453437_71456331 1725 chr11 71455160 ENSCAFT00000005323 ENSCAFP00000004927 A 226 V U benign N +Contig51_chr16_4789440_4790118 484 chr16 4789915 ENSCAFT00000005467 ENSCAFP00000005065 Q 318 H U benign N +Contig32_chr12_57224809_57225619 146 chr12 57224960 ENSCAFT00000005496 ENSCAFP00000005093 A 273 T 481925 benign N +Contig6_chr14_59310933_59312532 615 chr14 59311551 ENSCAFT00000005518 ENSCAFP00000005112 Y 304 H 492302 probably damaging cfa02010=ABC transporters.cfa04971=Gastric acid secretion.cfa04972=Pancreatic secretion.cfa04976=Bile secretion +Contig89_chr11_74391566_74395656 2856 chr11 74394408 ENSCAFT00000005653 ENSCAFP00000031395 R 450 H 403417 benign cfa04145=Phagosome.cfa04620=Toll-like receptor signaling pathway.cfa05132=Salmonella infection.cfa05133=Pertussis.cfa05134=Legionellosis.cfa05140=Leishmaniasis.cfa05142=Chagas disease (American trypanosomiasis).cfa05144=Malaria.cfa05145=Toxoplasmosis.cfa05146=Amoebiasis.cfa05152=Tuberculosis.cfa05162=Measles.cfa05164=Influenza A.cfa05323=Rheumatoid arthritis +Contig15_chr1_109713951_109714808 645 chr1 109714594 ENSCAFT00000005746 ENSCAFP00000005319 R 783 K 476410 benign cfa00071=Fatty acid metabolism.cfa03320=PPAR signaling pathway.cfa04920=Adipocytokine signaling pathway +Contig47_chr17_11258085_11259619 360 chr17 11258455 ENSCAFT00000005749 ENSCAFP00000005322 V 778 L 610007 benign N +Contig1_chr19_4352123_4352541 311 chr19 4352427 ENSCAFT00000005832 ENSCAFP00000005401 H 7 Y 403584 benign cfa04060=Cytokine-cytokine receptor interaction.cfa04630=Jak-STAT signaling pathway.cfa04672=Intestinal immune network for IgA production.cfa05166=HTLV-I infection.cfa05168=Herpes simplex infection.cfa05323=Rheumatoid arthritis +Contig57_chr12_66915864_66916357 337 chr12 66916199 ENSCAFT00000005972 ENSCAFP00000005534 F 1242 L 475012 benign N +Contig36_chr17_16182220_16182772 282 chr17 16182494 ENSCAFT00000006025 ENSCAFP00000005583 V 13 I 482980 possibly damaging N +Contig64_chr19_15052202_15053292 240 chr19 15052443 ENSCAFT00000006114 ENSCAFP00000005658 I 175 V 483829 benign N +Contig169_chr12_69415779_69417261 1136 chr12 69416908 ENSCAFT00000006157 ENSCAFP00000005701 D 85 N 475021 possibly damaging N +Contig200_chr18_15803806_15804082 169 chr18 15803976 ENSCAFT00000006219 ENSCAFP00000005760 A 66 V 483261 benign cfa04972=Pancreatic secretion.cfa04978=Mineral absorption +Contig6_chr18_15814044_15814404 97 chr18 15814150 ENSCAFT00000006219 ENSCAFP00000005760 A 413 S 483261 benign cfa04972=Pancreatic secretion.cfa04978=Mineral absorption +Contig104_chr1_110433641_110434230 183 chr1 110433810 ENSCAFT00000006272 ENSCAFP00000005811 A 315 T 484394 benign cfa00280=Valine, leucine and isoleucine degradation.cfa00290=Valine, leucine and isoleucine biosynthesis.cfa00770=Pantothenate and CoA biosynthesis.cfa01100=Metabolic pathways +Contig52_chr18_17851226_17851871 284 chr18 17851509 ENSCAFT00000006453 ENSCAFP00000005976 T 311 M 475893 probably damaging N +Contig63_chr16_12167721_12168304 388 chr16 12168099 ENSCAFT00000006479 ENSCAFP00000006000 M 634 V U benign N +Contig101_chr20_4702659_4703738 441 chr20 4703092 ENSCAFT00000006507 ENSCAFP00000006027 G 635 D 484622 probably damaging cfa03030=DNA replication.cfa04110=Cell cycle +Contig53_chr19_21456428_21457881 408 chr19 21456840 ENSCAFT00000006669 ENSCAFP00000006174 R 247 L 476094 possibly damaging N +Contig58_chr18_19883250_19884312 250 chr18 19883498 ENSCAFT00000006689 ENSCAFP00000006194 * 503 Y 475897 benign N +Contig122_chr15_17034758_17035049 142 chr15 17034893 ENSCAFT00000006827 ENSCAFP00000006320 R 117 P U benign N +Contig131_chr18_20356930_20357227 113 chr18 20357041 ENSCAFT00000006891 ENSCAFP00000006378 V 55 L 610021 benign N +Contig117_chr22_5859195_5860740 654 chr22 5859850 ENSCAFT00000007130 ENSCAFP00000006603 S 139 N 485445 benign cfa04020=Calcium signaling pathway.cfa04080=Neuroactive ligand-receptor interaction +Contig91_chr17_23506302_23507213 322 chr17 23506624 ENSCAFT00000007145 ENSCAFP00000006614 V 1644 I 607961 benign N +Contig3_chr21_16586556_16586852 105 chr21 16586661 ENSCAFT00000007244 ENSCAFP00000006709 C 33 Y 476781 possibly damaging N +Contig62_chr2_22645987_22646907 357 chr2 22646352 ENSCAFT00000007375 ENSCAFP00000006833 V 657 F 403767 probably damaging cfa04977=Vitamin digestion and absorption +Contig52_chr15_18032498_18034281 880 chr15 18033373 ENSCAFT00000007440 ENSCAFP00000006895 P 227 A 482516 benign N +Contig131_chr23_6679385_6679850 198 chr23 6679592 ENSCAFT00000007467 ENSCAFP00000006915 R 136 G 485576 possibly damaging N +Contig157_chr22_10584088_10586765 232 chr22 10584326 ENSCAFT00000007484 ENSCAFP00000006926 M 610 T 609336 benign N +Contig164_chr2_24336024_24340161 2420 chr2 24338436 ENSCAFT00000007527 ENSCAFP00000006969 S 824 C 607108 probably damaging N +Contig109_chr2_24557417_24558710 808 chr2 24558229 ENSCAFT00000007553 ENSCAFP00000006994 L 606 V 487123 benign cfa03450=Non-homologous end-joining.cfa05340=Primary immunodeficiency +Contig194_chr15_18573761_18574204 142 chr15 18573904 ENSCAFT00000007697 ENSCAFP00000007130 V 381 I 475382 benign N +Contig133_chr23_9924894_9925887 125 chr23 9925016 ENSCAFT00000007703 ENSCAFP00000007136 P 355 S 477019 benign cfa03430=Mismatch repair.cfa03460=Fanconi anemia pathway.cfa05200=Pathways in cancer.cfa05210=Colorectal cancer.cfa05213=Endometrial cancer +Contig31_chr23_10199273_10203629 4073 chr23 10203350 ENSCAFT00000007747 ENSCAFP00000007179 A 1844 V U benign N +Contig21_chr23_10308212_10309269 513 chr23 10308732 ENSCAFT00000007774 ENSCAFP00000007206 K 72 R 477021 benign cfa04510=Focal adhesion.cfa04512=ECM-receptor interaction.cfa04514=Cell adhesion molecules (CAMs).cfa04810=Regulation of actin cytoskeleton.cfa05410=Hypertrophic cardiomyopathy (HCM).cfa05412=Arrhythmogenic right ventricular cardiomyopathy (ARVC).cfa05414=Dilated cardiomyopathy +Contig211_chr1_114924893_114925515 171 chr1 114925067 ENSCAFT00000007776 ENSCAFP00000007208 P 1988 A U benign N +Contig35_chr2_27160577_27161526 804 chr2 27161367 ENSCAFT00000007779 ENSCAFP00000007211 G 473 R 478007.478008 probably damaging cfa03060=Protein export.cfa04141=Protein processing in endoplasmic reticulum.cfa04145=Phagosome +Contig79_chr17_24285444_24286769 1263 chr17 24286694 ENSCAFT00000007859 ENSCAFP00000007285 S 209 T 483010 benign N +Contig74_chr23_10871047_10871362 70 chr23 10871116 ENSCAFT00000007951 ENSCAFP00000007365 I 474 V U benign N +Contig34_chr16_18928689_18932806 3409 chr16 18932072 ENSCAFT00000007959 ENSCAFP00000007370 A 3754 S 482810.611087 benign cfa00310=Lysine degradation +Contig52_chr21_24452521_24454405 725 chr21 24453245 ENSCAFT00000008012 ENSCAFP00000007418 M 289 T 485173 possibly damaging N +Contig261_chr1_115563599_115564561 560 chr1 115564156 ENSCAFT00000008063 ENSCAFP00000007465 A 63 T 484489 possibly damaging N +Contig62_chr19_41037398_41039465 159 chr19 41037564 ENSCAFT00000008142 ENSCAFP00000007541 C 744 Y 476128 possibly damaging N +Contig84_chr1_115960693_115962811 1467 chr1 115962120 ENSCAFT00000008198 ENSCAFP00000007593 W 61 R 612489 benign N +Contig135_chr23_14160194_14160717 270 chr23 14160468 ENSCAFT00000008413 ENSCAFP00000007796 V 298 I U benign N +Contig41_chr17_26203621_26205196 1407 chr17 26205028 ENSCAFT00000008540 ENSCAFP00000007913 H 172 R 483021 benign N +Contig260_chr1_116076701_116078120 746 chr1 116077446 ENSCAFT00000008586 ENSCAFP00000007956 T 2486 I 484499 benign N +Contig19_chr23_14811332_14815323 1987 chr23 14813327 ENSCAFT00000008588 ENSCAFP00000007958 S 690 L U unknown N +Contig180_chr2_35061773_35062172 166 chr2 35061941 ENSCAFT00000008673 ENSCAFP00000008039 T 920 M 478018 probably damaging N +Contig106_chr21_26153874_26154496 107 chr21 26153984 ENSCAFT00000008678 ENSCAFP00000008044 A 458 T 485188 benign N +Contig3_chr19_45625337_45630123 2563 chr19 45627887 ENSCAFT00000008728 ENSCAFP00000008094 V 1264 I U benign N +Contig51_chr22_48760401_48761638 636 chr22 48761047 ENSCAFT00000008769 ENSCAFP00000008132 R 1071 K 485523 benign cfa02010=ABC transporters.cfa04976=Bile secretion +Contig10_chr15_21173640_21174011 212 chr15 21173839 ENSCAFT00000008831 ENSCAFP00000008192 V 191 I 475398 benign N +Contig6_chr24_14680423_14681438 782 chr24 14681208 ENSCAFT00000009074 ENSCAFP00000008417 H 562 R 485769 possibly damaging cfa04330=Notch signaling pathway +Contig60_chr9_4528464_4529207 262 chr9 4528727 ENSCAFT00000009114 ENSCAFP00000008453 C 24 F 483354 possibly damaging N +Contig54_chr15_29510545_29512205 400 chr15 29510955 ENSCAFT00000009614 ENSCAFP00000008928 H 190 R 475416 benign N +Contig46_chr25_5067588_5068089 39 chr25 5067627 ENSCAFT00000009698 ENSCAFP00000009003 S 17 N 486001 benign N +Contig126_chr25_5114359_5115799 643 chr25 5114996 ENSCAFT00000009710 ENSCAFP00000009013 R 1952 C 486002 possibly damaging N +Contig41_chr26_3455305_3455893 329 chr26 3455620 ENSCAFT00000010094 ENSCAFP00000009363 S 909 A 486223 benign cfa00230=Purine metabolism.cfa00240=Pyrimidine metabolism.cfa01100=Metabolic pathways.cfa03030=DNA replication.cfa03410=Base excision repair.cfa03420=Nucleotide excision repair.cfa05166=HTLV-I infection +Contig55_chr26_3463883_3465235 1074 chr26 3464998 ENSCAFT00000010094 ENSCAFP00000009363 R 1273 S 486223 benign cfa00230=Purine metabolism.cfa00240=Pyrimidine metabolism.cfa01100=Metabolic pathways.cfa03030=DNA replication.cfa03410=Base excision repair.cfa03420=Nucleotide excision repair.cfa05166=HTLV-I infection +Contig63_chr26_3467460_3468420 195 chr26 3467661 ENSCAFT00000010094 ENSCAFP00000009363 E 1542 Q 486223 benign cfa00230=Purine metabolism.cfa00240=Pyrimidine metabolism.cfa01100=Metabolic pathways.cfa03030=DNA replication.cfa03410=Base excision repair.cfa03420=Nucleotide excision repair.cfa05166=HTLV-I infection +Contig13_chr16_32259141_32259752 344 chr16 32259472 ENSCAFT00000010141 ENSCAFP00000009407 I 326 T 482857 benign cfa04360=Axon guidance +Contig59_chr21_32994329_32995926 1195 chr21 32995538 ENSCAFT00000010439 ENSCAFP00000009680 H 230 R 610992 benign N +Contig39_chr20_24938452_24941620 1292 chr20 24939734 ENSCAFT00000010496 ENSCAFP00000009730 S 28 P 415126 benign cfa04380=Osteoclast differentiation.cfa04916=Melanogenesis.cfa05200=Pathways in cancer.cfa05218=Melanoma +Contig2_chr18_28546360_28546760 277 chr18 28546640 ENSCAFT00000010516 ENSCAFP00000009748 P 471 S U benign N +Contig23_chr20_25560598_25562858 928 chr20 25561520 ENSCAFT00000010531 ENSCAFP00000009762 T 749 I 484693 benign N +Contig209_chr18_28672330_28672791 376 chr18 28672689 ENSCAFT00000010559 ENSCAFP00000009790 A 33 D 483405 unknown N +Contig261_chr18_28694652_28696968 1808 chr18 28696427 ENSCAFT00000010559 ENSCAFP00000009790 P 1443 L 483405 possibly damaging N +Contig30_chr25_12008255_12009009 151 chr25 12008417 ENSCAFT00000010593 ENSCAFP00000009822 Q 151 H U benign N +Contig46_chr29_3065854_3067420 1265 chr29 3067078 ENSCAFT00000010616 ENSCAFP00000009842 V 3253 A 474176 benign cfa03450=Non-homologous end-joining.cfa04110=Cell cycle +Contig59_chr28_3755477_3757019 935 chr28 3756419 ENSCAFT00000010630 ENSCAFP00000009853 R 923 Q 486770 possibly damaging N +Contig90_chr29_6393993_6395503 951 chr29 6394948 ENSCAFT00000010829 ENSCAFP00000010033 Y 257 C 486944 benign N +Contig42_chr16_39015800_39016389 319 chr16 39016119 ENSCAFT00000010865 ENSCAFP00000010068 D 71 N U possibly damaging N +Contig95_chr21_34533214_34535079 1133 chr21 34534321 ENSCAFT00000010931 ENSCAFP00000010131 E 118 G 485368 benign N +Contig82_chr21_34524815_34525170 247 chr21 34525072 ENSCAFT00000010931 ENSCAFP00000010131 Q 499 R 485368 benign N +Contig32_chr24_22727492_22727986 147 chr24 22727648 ENSCAFT00000010977 ENSCAFP00000010173 P 278 L U possibly damaging N +Contig45_chr16_42405571_42406148 269 chr16 42405837 ENSCAFT00000010988 ENSCAFP00000010184 H 406 R 482891 benign cfa04145=Phagosome +Contig66_chr15_43321121_43321872 642 chr15 43321764 ENSCAFT00000011187 ENSCAFP00000010364 F 543 L 475441 benign N +Contig184_chr27_5103641_5104991 275 chr27 5103979 ENSCAFT00000011380 ENSCAFP00000010541 V 864 A U benign N +Contig88_chr17_39320200_39320765 204 chr17 39320404 ENSCAFT00000011397 ENSCAFP00000010558 S 1911 N 475750 benign cfa04110=Cell cycle.cfa04114=Oocyte meiosis.cfa04120=Ubiquitin mediated proteolysis.cfa04914=Progesterone-mediated oocyte maturation.cfa05166=HTLV-I infection +Contig8_chr16_47195242_47195504 193 chr16 47195429 ENSCAFT00000011721 ENSCAFP00000010862 S 4369 W 475621 benign N +Contig84_chr27_5882441_5882771 145 chr27 5882579 ENSCAFT00000011730 ENSCAFP00000010871 C 289 S 486534 benign N +Contig42_chr24_25316755_25317362 320 chr24 25317091 ENSCAFT00000011771 ENSCAFP00000010910 G 22 S 477193 benign N +Contig45_chr24_25318544_25319490 734 chr24 25319299 ENSCAFT00000011771 ENSCAFP00000010910 V 187 A 477193 benign N +Contig31_chr24_25434125_25435133 853 chr24 25434975 ENSCAFT00000011789 ENSCAFP00000010928 S 91 I 609978 benign N +Contig20_chr3_10579133_10580085 600 chr3 10579729 ENSCAFT00000011968 ENSCAFP00000011099 K 165 E 488881 benign cfa00760=Nicotinate and nicotinamide metabolism.cfa04146=Peroxisome +Contig45_chr2_54585564_54588038 1047 chr2 54586611 ENSCAFT00000012081 ENSCAFP00000011198 T 969 M 478082 benign cfa04621=NOD-like receptor signaling pathway +Contig156_chr1_122375741_122376035 168 chr1 122375904 ENSCAFT00000012133 ENSCAFP00000011248 R 628 K 611998 benign N +Contig153_chr1_124036982_124040108 1588 chr1 124038585 ENSCAFT00000012159 ENSCAFP00000011272 A 887 T 484609 benign N +Contig32_chr24_26900375_26900913 394 chr24 26900761 ENSCAFT00000012254 ENSCAFP00000011358 H 51 Y U benign N +Contig103_chr16_48829082_48829675 123 chr16 48829205 ENSCAFT00000012381 ENSCAFP00000011471 E 369 G 475632 possibly damaging N +Contig25_chr18_41490135_41493501 534 chr18 41490665 ENSCAFT00000012414 ENSCAFP00000011503 R 703 C 483489 probably damaging cfa04520=Adherens junction.cfa04670=Leukocyte transendothelial migration +Contig69_chr16_49314879_49317228 1810 chr16 49316689 ENSCAFT00000012456 ENSCAFP00000011541 P 431 L 475636 probably damaging cfa00565=Ether lipid metabolism +Contig71_chr17_42734055_42736474 2240 chr17 42736298 ENSCAFT00000012478 ENSCAFP00000011561 R 307 Q 483083 benign cfa00830=Retinol metabolism +Contig17_chr17_43378842_43379885 305 chr17 43379148 ENSCAFT00000012676 ENSCAFP00000011740 T 196 M U probably damaging N +Contig195_chr27_7047911_7049009 555 chr27 7048468 ENSCAFT00000012942 ENSCAFP00000011978 R 881 L 477608 benign N +Contig112_chr30_4254316_4256576 1478 chr30 4255785 ENSCAFT00000012974 ENSCAFP00000012007 V 2939 I U benign N +Contig43_chr20_39124486_39124798 114 chr20 39124607 ENSCAFT00000013097 ENSCAFP00000012118 G 325 R 607274 possibly damaging N +Contig96_chr16_55849292_55849592 194 chr16 55849494 ENSCAFT00000013360 ENSCAFP00000012363 A 41 S 482932 benign cfa04060=Cytokine-cytokine receptor interaction.cfa04150=mTOR signaling pathway.cfa04510=Focal adhesion.cfa05200=Pathways in cancer.cfa05211=Renal cell carcinoma.cfa05212=Pancreatic cancer.cfa05219=Bladder cancer +Contig91_chr17_51684551_51689453 4154 chr17 51688687 ENSCAFT00000013395 ENSCAFP00000012395 P 306 L 475784 benign N +Contig192_chr26_12794366_12794712 143 chr26 12794506 ENSCAFT00000014076 ENSCAFP00000013021 V 2478 I 477486 benign N +Contig191_chr31_30109152_30109760 212 chr31 30109363 ENSCAFT00000014113 ENSCAFP00000013055 A 1813 T 487735 probably damaging N +Contig116_chr24_29683980_29684819 101 chr24 29684079 ENSCAFT00000014115 ENSCAFP00000013057 R 836 C 485868 probably damaging N +Contig8_chr32_9413601_9414435 74 chr32 9413675 ENSCAFT00000014257 ENSCAFP00000013183 N 236 K 478452 probably damaging cfa00270=Cysteine and methionine metabolism +Contig90_chr21_43253791_43254774 189 chr21 43253974 ENSCAFT00000014325 ENSCAFP00000013248 I 758 V U benign N +Contig76_chr24_30292767_30294101 552 chr24 30293321 ENSCAFT00000014346 ENSCAFP00000013267 A 349 T U benign N +Contig21_chr25_37121451_37122072 177 chr25 37121616 ENSCAFT00000014616 ENSCAFP00000013518 V 157 L 486118 benign N +Contig15_chr36_6357141_6362626 5226 chr36 6362346 ENSCAFT00000014702 ENSCAFP00000013598 N 138 K 607626 possibly damaging N +Contig64_chr17_54734453_54734993 109 chr17 54734552 ENSCAFT00000014707 ENSCAFP00000013603 S 302 L 483124 benign N +Contig91_chr18_46134014_46136042 330 chr18 46134347 ENSCAFT00000014736 ENSCAFP00000013630 A 214 S 483635 benign cfa00561=Glycerolipid metabolism.cfa00564=Glycerophospholipid metabolism.cfa01100=Metabolic pathways.cfa04070=Phosphatidylinositol signaling system +Contig111_chr31_31237314_31238628 920 chr31 31238220 ENSCAFT00000014822 ENSCAFP00000013714 S 143 C 478408 benign N +Contig77_chr38_3502296_3503058 349 chr38 3502639 ENSCAFT00000015260 ENSCAFP00000014122 K 666 E 478932 benign N +Contig59_chr38_3998294_3999004 369 chr38 3998672 ENSCAFT00000015347 ENSCAFP00000014201 V 791 I U benign N +Contig123_chr31_34367825_34368648 664 chr31 34368468 ENSCAFT00000015534 ENSCAFP00000014373 H 204 Q U possibly damaging N +Contig43_chr30_11874641_11875130 198 chr30 11874850 ENSCAFT00000015654 ENSCAFP00000014488 R 3422 C U benign N +Contig9_chr20_40741488_40743247 1027 chr20 40742525 ENSCAFT00000015816 ENSCAFP00000014638 M 183 V 484744 benign N +Contig137_chr5_7048977_7051042 863 chr5 7049840 ENSCAFT00000015844 ENSCAFP00000014662 A 311 V 479391 benign N +Contig9_chr28_17675067_17680985 1564 chr28 17676618 ENSCAFT00000015971 ENSCAFP00000014772 R 515 P 477805 unknown N +Contig126_chr30_12286682_12287475 407 chr30 12287101 ENSCAFT00000016062 ENSCAFP00000014854 V 450 I 487517 benign cfa00052=Galactose metabolism.cfa00500=Starch and sucrose metabolism.cfa01100=Metabolic pathways +Contig127_chr30_12287497_12288447 608 chr30 12288095 ENSCAFT00000016062 ENSCAFP00000014854 T 495 M 487517 benign cfa00052=Galactose metabolism.cfa00500=Starch and sucrose metabolism.cfa01100=Metabolic pathways +Contig13_chr38_5058391_5058630 66 chr38 5058458 ENSCAFT00000016099 ENSCAFP00000014887 F 412 L 478943 benign N +Contig169_chr35_19985467_19986000 455 chr35 19985921 ENSCAFT00000016165 ENSCAFP00000014950 T 175 I 478733 benign N +Contig2_chr35_21794536_21795092 291 chr35 21794865 ENSCAFT00000016208 ENSCAFP00000014992 V 84 A 488238 benign cfa00561=Glycerolipid metabolism.cfa00564=Glycerophospholipid metabolism.cfa01100=Metabolic pathways +Contig141_chr26_19278751_19279229 364 chr26 19279128 ENSCAFT00000016284 ENSCAFP00000015064 N 29 S 404011 benign cfa00564=Glycerophospholipid metabolism.cfa00565=Ether lipid metabolism.cfa00590=Arachidonic acid metabolism.cfa00591=Linoleic acid metabolism.cfa00592=alpha-Linolenic acid metabolism.cfa01100=Metabolic pathways.cfa04010=MAPK signaling pathway.cfa04270=Vascular smooth muscle contraction.cfa04370=VEGF signaling pathway.cfa04664=Fc epsilon RI signaling pathway.cfa04724=Glutamatergic synapse.cfa04730=Long-term depression.cfa04912=GnRH signaling pathway.cfa04972=Pancreatic secretion.cfa04975=Fat digestion and absorption.cfa05145=Toxoplasmosis +Contig179_chr3_40781459_40782026 285 chr3 40781763 ENSCAFT00000016410 ENSCAFP00000015182 D 1174 N 488699 benign N +Contig237_chr21_53631024_53632458 203 chr21 53631227 ENSCAFT00000016459 ENSCAFP00000015227 C 47 W 403799 probably damaging cfa04664=Fc epsilon RI signaling pathway.cfa05310=Asthma +Contig186_chr2_71203100_71204111 202 chr2 71203303 ENSCAFT00000016485 ENSCAFP00000015250 S 188 T 478144 benign cfa00330=Arginine and proline metabolism.cfa01100=Metabolic pathways +Contig1_chr28_18779291_18780149 325 chr28 18779619 ENSCAFT00000016578 ENSCAFP00000015340 N 245 Y U probably damaging N +Contig166_chr4_77425871_77426835 797 chr4 77426667 ENSCAFT00000016670 ENSCAFP00000015429 D 115 G 479370 benign cfa00970=Aminoacyl-tRNA biosynthesis +Contig35_chr24_36806524_36807086 367 chr24 36806891 ENSCAFT00000016727 ENSCAFP00000015478 F 345 L 485910 benign N +Contig45_chr37_8610877_8611425 194 chr37 8611078 ENSCAFT00000016761 ENSCAFP00000015511 D 2849 N 488452 possibly damaging N +Contig39_chr28_19446540_19447838 1068 chr28 19447566 ENSCAFT00000016791 ENSCAFP00000015537 A 1596 E U benign N +Contig161_chr18_51013230_51015381 1494 chr18 51014735 ENSCAFT00000016827 ENSCAFP00000015571 L 977 V 475999 benign N +Contig25_chr28_19619108_19621267 1728 chr28 19620832 ENSCAFT00000016848 ENSCAFP00000034237 I 108 V 609723 benign N +Contig33_chr20_42063173_42064259 623 chr20 42063789 ENSCAFT00000017070 ENSCAFP00000015794 V 179 M U probably damaging N +Contig39_chr38_14681397_14682234 384 chr38 14681781 ENSCAFT00000017072 ENSCAFP00000015796 H 282 N 488593 unknown N +Contig6_chr32_27303975_27304541 425 chr32 27304407 ENSCAFT00000017178 ENSCAFP00000015896 S 354 T 610098 benign N +Contig173_chr38_17709765_17711029 179 chr38 17709941 ENSCAFT00000017240 ENSCAFP00000015955 G 464 R U benign N +Contig52_chr32_27452924_27453332 91 chr32 27452999 ENSCAFT00000017249 ENSCAFP00000015964 A 22 S U benign N +Contig319_chr34_14684259_14684663 353 chr34 14684613 ENSCAFT00000017314 ENSCAFP00000016025 R 5 Q 478632 benign N +Contig32_chr2_72269353_72269814 349 chr2 72269708 ENSCAFT00000017327 ENSCAFP00000016037 P 853 L 487317 possibly damaging N +Contig206_chr9_18720001_18720613 155 chr9 18720160 ENSCAFT00000017373 ENSCAFP00000016082 D 1621 E 480456 benign cfa02010=ABC transporters +Contig35_chr37_10562149_10562621 74 chr37 10562222 ENSCAFT00000017444 ENSCAFP00000016153 I 975 V 478858 benign cfa04727=GABAergic synapse +Contig1_chr30_12655575_12656916 370 chr30 12655947 ENSCAFT00000017777 ENSCAFP00000016457 L 639 M 608886 probably damaging N +Contig63_chr27_23738716_23739879 1131 chr27 23739850 ENSCAFT00000017892 ENSCAFP00000016566 P 642 L 486627 benign N +Contig44_chr28_28123120_28124627 1348 chr28 28124495 ENSCAFT00000017967 ENSCAFP00000016639 V 261 A 477827 benign N +Contig23_chrX_6416128_6417014 455 chrX 6416585 ENSCAFT00000018017 ENSCAFP00000016684 H 111 R 491733 possibly damaging N +Contig31_chr7_8282189_8286932 3631 chr7 8285875 ENSCAFT00000018057 ENSCAFP00000016724 L 655 P 490260 benign N +Contig318_chr6_8706066_8706350 76 chr6 8706142 ENSCAFT00000018106 ENSCAFP00000016769 K 318 N 607700 possibly damaging cfa04062=Chemokine signaling pathway.cfa04145=Phagosome.cfa04380=Osteoclast differentiation.cfa04666=Fc gamma R-mediated phagocytosis.cfa04670=Leukocyte transendothelial migration.cfa05140=Leishmaniasis +Contig36_chr32_33046881_33048369 1118 chr32 33047990 ENSCAFT00000018307 ENSCAFP00000016954 E 555 A 403657 benign cfa04010=MAPK signaling pathway.cfa04012=ErbB signaling pathway.cfa04060=Cytokine-cytokine receptor interaction.cfa04144=Endocytosis.cfa04510=Focal adhesion.cfa04540=Gap junction.cfa04810=Regulation of actin cytoskeleton.cfa05160=Hepatitis C.cfa05200=Pathways in cancer.cfa05212=Pancreatic cancer.cfa05213=Endometrial cancer.cfa05214=Glioma.cfa05215=Prostate cancer.cfa05218=Melanoma.cfa05219=Bladder cancer.cfa05223=Non-small cell lung cancer +Contig32_chr5_14476595_14477214 347 chr5 14476948 ENSCAFT00000018571 ENSCAFP00000017201 V 209 A 610296 benign N +Contig88_chr34_19031138_19031937 343 chr34 19031477 ENSCAFT00000018684 ENSCAFP00000017309 K 670 R 478645 benign cfa00280=Valine, leucine and isoleucine degradation.cfa01100=Metabolic pathways +Contig188_chr25_47927372_47928085 557 chr25 47927941 ENSCAFT00000018758 ENSCAFP00000017379 K 228 R 486167 benign cfa00561=Glycerolipid metabolism.cfa00564=Glycerophospholipid metabolism.cfa01100=Metabolic pathways.cfa04070=Phosphatidylinositol signaling system +Contig36_chr37_12924359_12924740 86 chr37 12924449 ENSCAFT00000018786 ENSCAFP00000017406 D 187 Y 608849 probably damaging cfa00280=Valine, leucine and isoleucine degradation.cfa00350=Tyrosine metabolism.cfa00380=Tryptophan metabolism.cfa00750=Vitamin B6 metabolism.cfa00760=Nicotinate and nicotinamide metabolism.cfa00982=Drug metabolism - cytochrome P450.cfa01100=Metabolic pathways +Contig3_chr34_19471626_19472377 337 chr34 19471956 ENSCAFT00000018788 ENSCAFP00000017408 R 239 Q 488096 possibly damaging N +Contig80_chr4_11155760_11156827 952 chr4 11156735 ENSCAFT00000018796 ENSCAFP00000017416 S 661 N 479204 benign cfa00564=Glycerophospholipid metabolism.cfa04146=Peroxisome +Contig56_chr8_7093747_7095987 683 chr8 7094428 ENSCAFT00000018813 ENSCAFP00000017431 P 126 R 490620 unknown N +Contig82_chr8_7111986_7114065 1351 chr8 7113329 ENSCAFT00000018871 ENSCAFP00000017488 R 608 H 480255 probably damaging cfa00010=Glycolysis / Gluconeogenesis.cfa00020=Citrate cycle (TCA cycle).cfa00620=Pyruvate metabolism.cfa01100=Metabolic pathways.cfa03320=PPAR signaling pathway.cfa04910=Insulin signaling pathway.cfa04920=Adipocytokine signaling pathway.cfa04964=Proximal tubule bicarbonate reclamation +Contig172_chr33_28585454_28586084 228 chr33 28585687 ENSCAFT00000018884 ENSCAFP00000017500 R 36 K 478584 benign N +Contig113_chr5_16682954_16684491 688 chr5 16683641 ENSCAFT00000018997 ENSCAFP00000017606 F 41 L 489360 benign N +Contig36_chr28_31449413_31452160 2111 chr28 31451506 ENSCAFT00000019041 ENSCAFP00000017650 P 252 H 477834 benign cfa04144=Endocytosis +Contig80_chr3_55628026_55628800 392 chr3 55628403 ENSCAFT00000019070 ENSCAFP00000017677 R 805 K 403913 benign cfa00480=Glutathione metabolism.cfa01100=Metabolic pathways.cfa04614=Renin-angiotensin system.cfa04640=Hematopoietic cell lineage +Contig99_chr7_11816365_11819255 806 chr7 11817201 ENSCAFT00000019101 ENSCAFP00000017707 C 305 G 490276 benign N +Contig114_chr4_12744102_12745318 148 chr4 12744256 ENSCAFT00000019279 ENSCAFP00000017880 I 700 V U benign N +Contig82_chr7_13056757_13058281 974 chr7 13057742 ENSCAFT00000019316 ENSCAFP00000017915 S 283 N 609933 benign cfa00564=Glycerophospholipid metabolism +Contig280_chr25_51367477_51367885 70 chr25 51367542 ENSCAFT00000019610 ENSCAFP00000018191 S 97 L U benign N +Contig35_chr20_43508791_43509352 460 chr20 43509254 ENSCAFT00000019627 ENSCAFP00000018204 V 77 A 608455 benign cfa00190=Oxidative phosphorylation.cfa01100=Metabolic pathways.cfa04260=Cardiac muscle contraction.cfa05010=Alzheimer's disease.cfa05012=Parkinson's disease.cfa05016=Huntington's disease +Contig36_chr20_43509362_43510980 1484 chr20 43510860 ENSCAFT00000019627 ENSCAFP00000018204 D 181 N 608455 benign cfa00190=Oxidative phosphorylation.cfa01100=Metabolic pathways.cfa04260=Cardiac muscle contraction.cfa05010=Alzheimer's disease.cfa05012=Parkinson's disease.cfa05016=Huntington's disease +Contig59_chr25_51807653_51809044 1064 chr25 51808739 ENSCAFT00000019760 ENSCAFP00000018330 R 235 K U benign N +Contig96_chr36_17712997_17714068 556 chr36 17713559 ENSCAFT00000019807 ENSCAFP00000018374 T 423 I 478789 benign N +Contig163_chr28_34927368_34929275 1128 chr28 34928486 ENSCAFT00000019866 ENSCAFP00000018425 A 2659 T 477850 benign N +Contig74_chr33_31230250_31230874 246 chr33 31230493 ENSCAFT00000019938 ENSCAFP00000018492 G 113 S 488016 probably damaging N +Contig130_chr7_15553315_15558308 3186 chr7 15556497 ENSCAFT00000020009 ENSCAFP00000018561 K 1513 N U benign N +Contig160_chr2_76816412_76817166 354 chr2 76816779 ENSCAFT00000020143 ENSCAFP00000018683 I 190 V 478173 benign N +Contig219_chr33_31871568_31871771 81 chr33 31871646 ENSCAFT00000020195 ENSCAFP00000018733 N 346 H U probably damaging N +Contig254_chr24_50001599_50001992 151 chr24 50001767 ENSCAFT00000020266 ENSCAFP00000018803 R 239 Q U benign N +Contig40_chr37_15283702_15285945 1908 chr37 15285621 ENSCAFT00000020408 ENSCAFP00000018937 A 809 G U unknown N +Contig59_chr20_43702094_43703358 450 chr20 43702540 ENSCAFT00000020438 ENSCAFP00000018965 S 217 A U benign N +Contig75_chr3_57465650_57466327 377 chr3 57466017 ENSCAFT00000020863 ENSCAFP00000019371 L 205 F 609716 probably damaging N +Contig155_chr2_79195879_79199423 2014 chr2 79197892 ENSCAFT00000021154 ENSCAFP00000019645 G 549 S U benign N +Contig155_chr2_79195879_79199423 3136 chr2 79199014 ENSCAFT00000021154 ENSCAFP00000019645 R 923 C U probably damaging N +Contig59_chr5_19784971_19787384 1310 chr5 19786293 ENSCAFT00000021222 ENSCAFP00000019707 V 171 I 479428.489393 benign cfa03320=PPAR signaling pathway +Contig41_chr30_14304605_14305465 206 chr30 14304816 ENSCAFT00000021612 ENSCAFP00000020069 A 157 G U benign N +Contig47_chr20_45043804_45044476 317 chr20 45044117 ENSCAFT00000021659 ENSCAFP00000020114 V 281 I 609323 benign N +Contig46_chr4_22849549_22849829 123 chr4 22849673 ENSCAFT00000021752 ENSCAFP00000020204 V 646 M U probably damaging N +Contig141_chr7_22360980_22361690 242 chr7 22361233 ENSCAFT00000021777 ENSCAFP00000020227 K 1862 R U unknown N +Contig59_chr30_14758622_14760653 1186 chr30 14759817 ENSCAFT00000021792 ENSCAFP00000020241 S 284 R 609256 benign N +Contig57_chr27_39696388_39698349 1026 chr27 39697428 ENSCAFT00000021846 ENSCAFP00000020293 Q 588 R 477699 benign cfa04610=Complement and coagulation cascades +Contig83_chr27_40151814_40153141 738 chr27 40152551 ENSCAFT00000022064 ENSCAFP00000020490 S 191 R 477702 benign N +Contig105_chr6_11901733_11904968 406 chr6 11902145 ENSCAFT00000022289 ENSCAFP00000020701 Y 55 H 479732 probably damaging cfa04621=NOD-like receptor signaling pathway +Contig43_chr36_25298890_25299602 235 chr36 25299132 ENSCAFT00000022319 ENSCAFP00000020728 E 11731 K 610299.610339 unknown N +Contig3_chr36_25193150_25202641 2802 chr36 25195983 ENSCAFT00000022319 ENSCAFP00000020728 I 30137 V 610299.610339 benign N +Contig585_chr3_61201332_61201904 139 chr3 61201468 ENSCAFT00000022529 ENSCAFP00000020918 L 97 V 479067 benign cfa00071=Fatty acid metabolism.cfa03320=PPAR signaling pathway.cfa04920=Adipocytokine signaling pathway +Contig1_chr20_46714929_46715937 434 chr20 46715327 ENSCAFT00000022571 ENSCAFP00000020958 A 18 P 484804 unknown N +Contig7_chr8_29376780_29378260 158 chr8 29376937 ENSCAFT00000022576 ENSCAFP00000020962 T 852 A 490678 benign N +Contig74_chr8_29656170_29657212 595 chr8 29656776 ENSCAFT00000022697 ENSCAFP00000021080 E 974 K 490682 possibly damaging cfa04010=MAPK signaling pathway.cfa04012=ErbB signaling pathway.cfa04062=Chemokine signaling pathway.cfa04320=Dorso-ventral axis formation.cfa04510=Focal adhesion.cfa04540=Gap junction.cfa04630=Jak-STAT signaling pathway.cfa04650=Natural killer cell mediated cytotoxicity.cfa04660=T cell receptor signaling pathway.cfa04662=B cell receptor signaling pathway.cfa04664=Fc epsilon RI signaling pathway.cfa04722=Neurotrophin signaling pathway.cfa04810=Regulation of actin cytoskeleton.cfa04910=Insulin signaling pathway.cfa04912=GnRH signaling pathway.cfa05160=Hepatitis C.cfa05200=Pathways in cancer.cfa05211=Renal cell carcinoma.cfa05213=Endometrial cancer.cfa05214=Glioma.cfa05215=Prostate cancer.cfa05220=Chronic myeloid leukemia.cfa05221=Acute myeloid leukemia.cfa05223=Non-small cell lung cancer +Contig45_chr4_25273541_25274402 608 chr4 25274121 ENSCAFT00000022760 ENSCAFP00000021140 S 30 F 479239 probably damaging cfa04360=Axon guidance +Contig96_chr37_26111249_26111450 108 chr37 26111364 ENSCAFT00000022884 ENSCAFP00000021256 K 260 R 478902 benign cfa03450=Non-homologous end-joining +Contig196_chr3_62434637_62435063 203 chr3 62434823 ENSCAFT00000022915 ENSCAFP00000021284 L 174 P 488785 benign N +Contig15_chr6_12238116_12239737 1287 chr6 12239420 ENSCAFT00000022961 ENSCAFP00000021328 E 165 K 479735 benign N +Contig175_chr5_27267391_27267870 57 chr5 27267451 ENSCAFT00000023032 ENSCAFP00000021395 N 1094 S 479450 benign cfa04110=Cell cycle.cfa04115=p53 signaling pathway.cfa04210=Apoptosis.cfa05166=HTLV-I infection +Contig110_chr20_47192181_47193618 93 chr20 47192262 ENSCAFT00000023054 ENSCAFP00000021407 A 308 P 484814 probably damaging N +Contig9_chr4_26730063_26730585 245 chr4 26730316 ENSCAFT00000023087 ENSCAFP00000021437 E 153 D 489044 benign N +Contig1_chr34_35420831_35421658 73 chr34 35420908 ENSCAFT00000023111 ENSCAFP00000021457 V 251 I 488144 benign N +Contig199_chr2_79696091_79697603 751 chr2 79696840 ENSCAFT00000023253 ENSCAFP00000021593 D 54 A U possibly damaging N +Contig146_chrX_38946913_38947473 307 chrX 38947225 ENSCAFT00000023268 ENSCAFP00000021608 L 160 V 612457 possibly damaging N +Contig63_chr9_23532151_23533554 1297 chr9 23533421 ENSCAFT00000023438 ENSCAFP00000021767 Q 279 R 490958 benign N +Contig89_chr5_32060784_32061151 293 chr5 32061079 ENSCAFT00000023913 ENSCAFP00000022199 W 106 * 489430 probably damaging N +Contig15_chr3_65640843_65642155 1100 chr3 65641942 ENSCAFT00000023933 ENSCAFP00000022218 V 383 A 479080 benign N +Contig49_chr26_33571748_33572620 689 chr26 33572452 ENSCAFT00000024062 ENSCAFP00000022339 R 478 W 486440 benign N +Contig96_chr20_48055741_48057197 524 chr20 48056259 ENSCAFT00000024100 ENSCAFP00000022374 R 172 Q U benign N +Contig104_chr20_48062263_48062546 210 chr20 48062492 ENSCAFT00000024100 ENSCAFP00000022374 V 775 G U probably damaging N +Contig33_chr37_28794567_28796956 2144 chr37 28796718 ENSCAFT00000024137 ENSCAFP00000022408 E 279 Q 488536 benign N +Contig24_chr7_32005266_32005660 212 chr7 32005479 ENSCAFT00000024154 ENSCAFP00000022424 T 92 M U probably damaging N +Contig174_chr18_56896461_56897594 274 chr18 56896734 ENSCAFT00000024637 ENSCAFP00000022858 V 157 L 483779 benign cfa04130=SNARE interactions in vesicular transport +Contig55_chr20_48811642_48812027 299 chr20 48811941 ENSCAFT00000024761 ENSCAFP00000022970 H 993 R 476678 benign N +Contig220_chr18_56925351_56927006 920 chr18 56926246 ENSCAFT00000024787 ENSCAFP00000022995 P 420 Q 476051 possibly damaging cfa03022=Basal transcription factors.cfa05168=Herpes simplex infection +Contig12_chr8_39044824_39045409 359 chr8 39045181 ENSCAFT00000024804 ENSCAFP00000023011 I 280 T 612894 possibly damaging N +Contig23_chr3_72567678_72570858 1313 chr3 72568976 ENSCAFT00000024846 ENSCAFP00000023051 L 298 P 488826 benign N +Contig190_chr7_35896301_35896811 232 chr7 35896528 ENSCAFT00000024892 ENSCAFP00000023095 R 3 L 480092 unknown cfa00020=Citrate cycle (TCA cycle).cfa01100=Metabolic pathways.cfa05200=Pathways in cancer.cfa05211=Renal cell carcinoma +Contig119_chr20_49114009_49114654 266 chr20 49114270 ENSCAFT00000024934 ENSCAFP00000023135 F 339 L 484849 benign N +Contig47_chr8_41487304_41487682 210 chr8 41487515 ENSCAFT00000025088 ENSCAFP00000023286 S 1743 L 490729 possibly damaging N +Contig67_chr2_84099157_84100880 345 chr2 84099493 ENSCAFT00000025109 ENSCAFP00000023307 I 60 L U benign N +Contig33_chr20_49727730_49730958 2192 chr20 49729935 ENSCAFT00000025308 ENSCAFP00000023495 T 448 R U probably damaging N +Contig33_chr20_49727730_49730958 2907 chr20 49730606 ENSCAFT00000025308 ENSCAFP00000023495 W 493 L U benign N +Contig93_chr8_42181027_42183022 694 chr8 42181716 ENSCAFT00000025462 ENSCAFP00000023641 L 782 P U benign N +Contig131_chrX_44937490_44940040 950 chrX 44938456 ENSCAFT00000025663 ENSCAFP00000023835 V 120 M 491894 benign N +Contig100_chrX_44915404_44918232 1832 chrX 44917224 ENSCAFT00000025663 ENSCAFP00000023835 R 1212 Q 491894 benign N +Contig100_chrX_44915404_44918232 920 chrX 44916331 ENSCAFT00000025663 ENSCAFP00000023835 L 1377 V 491894 benign N +Contig123_chr9_26132942_26133532 310 chr9 26133253 ENSCAFT00000025948 ENSCAFP00000024090 I 232 V 491022 benign N +Contig34_chr6_17772839_17773548 489 chr6 17773329 ENSCAFT00000026008 ENSCAFP00000024146 E 377 Q U benign N +Contig382_chr7_43383655_43383893 190 chr7 43383854 ENSCAFT00000026053 ENSCAFP00000024188 R 123 C U possibly damaging N +Contig163_chr2_87404548_87404792 132 chr2 87404673 ENSCAFT00000026251 ENSCAFP00000024378 D 239 N U benign N +Contig15_chr3_91850893_91851323 75 chr3 91850967 ENSCAFT00000026343 ENSCAFP00000024465 S 722 N 595148 benign cfa04360=Axon guidance +Contig141_chr7_44385686_44386047 166 chr7 44385857 ENSCAFT00000026393 ENSCAFP00000024510 L 166 P 490412 benign cfa04810=Regulation of actin cytoskeleton +Contig161_chr2_87840986_87841705 540 chr2 87841516 ENSCAFT00000026485 ENSCAFP00000024598 F 678 C 478233 probably damaging cfa03018=RNA degradation +Contig177_chr9_27497479_27498192 354 chr9 27497831 ENSCAFT00000026613 ENSCAFP00000024719 A 175 V 491046 possibly damaging N +Contig162_chr6_20156115_20157725 81 chr6 20156197 ENSCAFT00000026687 ENSCAFP00000024793 T 702 M 489923.489924.607168 benign N +Contig8_chr9_28287278_28288276 469 chr9 28287755 ENSCAFT00000026707 ENSCAFP00000024813 A 75 P 491060 benign N +Contig166_chr7_45276673_45277595 235 chr7 45276916 ENSCAFT00000026881 ENSCAFP00000024984 V 525 I 490428 benign N +Contig16_chr8_51223078_51223662 481 chr8 51223563 ENSCAFT00000026967 ENSCAFP00000025070 R 869 Q 490790 benign N +Contig65_chr9_29792446_29793465 893 chr9 29793341 ENSCAFT00000027073 ENSCAFP00000025173 S 81 A 491082 benign N +Contig175_chr6_30926774_30927470 446 chr6 30927229 ENSCAFT00000027269 ENSCAFP00000025361 S 663 T 403453 benign cfa02010=ABC transporters.cfa04977=Vitamin digestion and absorption +Contig45_chr30_33024389_33025619 471 chr30 33024857 ENSCAFT00000027320 ENSCAFP00000025407 G 986 A 487608 benign N +Contig60_chr20_53087461_53088013 184 chr20 53087649 ENSCAFT00000027519 ENSCAFP00000025591 S 556 L 611163 benign N +Contig98_chr5_37073086_37073674 378 chr5 37073467 ENSCAFT00000027596 ENSCAFP00000025664 V 38 M 479499 probably damaging cfa04130=SNARE interactions in vesicular transport +Contig64_chr9_36235086_36235751 475 chr9 36235563 ENSCAFT00000027673 ENSCAFP00000025737 D 260 E 491111 benign cfa04970=Salivary secretion +Contig72_chr30_35330469_35330831 236 chr30 35330709 ENSCAFT00000027712 ENSCAFP00000025770 G 386 C 478353 probably damaging cfa04510=Focal adhesion.cfa04512=ECM-receptor interaction.cfa04810=Regulation of actin cytoskeleton.cfa05410=Hypertrophic cardiomyopathy (HCM).cfa05412=Arrhythmogenic right ventricular cardiomyopathy (ARVC).cfa05414=Dilated cardiomyopathy +Contig12_chr8_66066327_66066629 89 chr8 66066402 ENSCAFT00000027927 ENSCAFP00000025970 K 158 R 490836 benign N +Contig212_chr8_66173086_66174259 622 chr8 66173712 ENSCAFT00000027950 ENSCAFP00000025993 K 114 Q 480421 benign N +Contig176_chr7_48083671_48084458 311 chr7 48083983 ENSCAFT00000027972 ENSCAFP00000026015 R 128 H 480148 probably damaging N +Contig3_chr4_58820541_58821952 265 chr4 58820806 ENSCAFT00000027979 ENSCAFP00000026022 A 31 T 489166 benign N +Contig24_chr7_48238665_48239174 383 chr7 48239049 ENSCAFT00000028007 ENSCAFP00000026049 T 227 M 480151 probably damaging N +Contig25_chr6_26340448_26341519 657 chr6 26341104 ENSCAFT00000028115 ENSCAFP00000026155 S 128 L 479811 possibly damaging N +Contig212_chr5_38871122_38871621 302 chr5 38871429 ENSCAFT00000028231 ENSCAFP00000026253 P 2265 L 489507 benign N +Contig147_chr6_27310627_27310983 100 chr6 27310719 ENSCAFT00000028327 ENSCAFP00000026344 V 154 A U benign N +Contig160_chr6_27318582_27318861 67 chr6 27318647 ENSCAFT00000028327 ENSCAFP00000026344 K 325 R U benign N +Contig18_chr4_61023435_61026038 385 chr4 61023825 ENSCAFT00000028363 ENSCAFP00000026377 P 4110 L 479323 benign N +Contig162_chr30_40685605_40687049 343 chr30 40685956 ENSCAFT00000028463 ENSCAFP00000026472 A 416 P 487646 benign N +Contig68_chr20_54017481_54018354 221 chr20 54017705 ENSCAFT00000028500 ENSCAFP00000026509 W 539 R U benign N +Contig50_chr7_59076761_59079381 2353 chr7 59079104 ENSCAFT00000028551 ENSCAFP00000026557 V 1487 I 490492 benign N +Contig51_chr7_59079274_59084588 2611 chr7 59081905 ENSCAFT00000028551 ENSCAFP00000026557 A 575 V 490492 benign N +Contig3_chr20_54855789_54856135 37 chr20 54855833 ENSCAFT00000028813 ENSCAFP00000026796 F 6015 S U unknown N +Contig157_chr5_43472186_43472528 168 chr5 43472353 ENSCAFT00000028826 ENSCAFP00000026807 R 355 Q 489526 benign cfa00010=Glycolysis / Gluconeogenesis.cfa00340=Histidine metabolism.cfa00350=Tyrosine metabolism.cfa00360=Phenylalanine metabolism.cfa00410=beta-Alanine metabolism.cfa00980=Metabolism of xenobiotics by cytochrome P450.cfa00982=Drug metabolism - cytochrome P450.cfa01100=Metabolic pathways +Contig80_chr20_55281094_55281971 129 chr20 55281228 ENSCAFT00000028936 ENSCAFP00000026914 T 931 A U benign N +Contig214_chr8_74493164_74493474 188 chr8 74493346 ENSCAFT00000029054 ENSCAFP00000027017 R 94 C U probably damaging N +Contig259_chr20_55571618_55572503 186 chr20 55571803 ENSCAFT00000029100 ENSCAFP00000027059 K 526 Q 485001 benign N +Contig180_chr9_41668066_41668716 357 chr9 41668451 ENSCAFT00000029122 ENSCAFP00000027081 E 990 D 491145 benign cfa03410=Base excision repair +Contig61_chr4_63087183_63089623 491 chr4 63087672 ENSCAFT00000029130 ENSCAFP00000027089 A 20 S U benign N +Contig261_chrX_94412915_94414298 488 chrX 94413396 ENSCAFT00000029188 ENSCAFP00000027142 D 329 E U unknown N +Contig58_chr4_70221679_70223505 1749 chr4 70223432 ENSCAFT00000029501 ENSCAFP00000027423 T 324 S 403721 benign cfa04060=Cytokine-cytokine receptor interaction.cfa04080=Neuroactive ligand-receptor interaction.cfa04630=Jak-STAT signaling pathway +Contig21_chr7_77985141_77986170 827 chr7 77985962 ENSCAFT00000029651 ENSCAFP00000027557 A 855 S 490545 benign N +Contig93_chrX_104176429_104177974 811 chrX 104177246 ENSCAFT00000029709 ENSCAFP00000027610 T 719 M 492128 benign cfa03008=Ribosome biogenesis in eukaryotes +Contig175_chr9_46116277_46118268 1090 chr9 46117366 ENSCAFT00000029722 ENSCAFP00000027622 Q 693 H U benign N +Contig134_chr4_76495667_76496825 860 chr4 76496507 ENSCAFT00000029827 ENSCAFP00000027720 I 113 V 612589 benign cfa00250=Alanine, aspartate and glutamate metabolism.cfa00260=Glycine, serine and threonine metabolism.cfa01100=Metabolic pathways +Contig247_chr6_31967574_31967796 158 chr6 31967732 ENSCAFT00000029875 ENSCAFP00000027765 P 750 T 489999 benign N +Contig6_chr7_81650872_81657348 3786 chr7 81654636 ENSCAFT00000030050 ENSCAFP00000027927 S 501 C 480218 benign N +Contig122_chr5_57147596_57148457 360 chr5 57147964 ENSCAFT00000030140 ENSCAFP00000028007 T 713 I 479558 benign N +Contig83_chr20_58039274_58039724 380 chr20 58039649 ENSCAFT00000030192 ENSCAFP00000028056 E 142 K 611866 benign N +Contig42_chr5_58023274_58024296 585 chr5 58023845 ENSCAFT00000030282 ENSCAFP00000028135 V 415 A 489580 benign N +Contig248_chr20_58217741_58219717 751 chr20 58218495 ENSCAFT00000030285 ENSCAFP00000028138 G 278 S 485038 unknown N +Contig127_chr6_39501489_39501966 83 chr6 39501576 ENSCAFT00000030381 ENSCAFP00000028228 N 155 S 490020 benign N +Contig123_chr6_39499974_39501056 816 chr6 39500798 ENSCAFT00000030381 ENSCAFP00000028228 A 195 P 490020 benign N +Contig247_chr6_39576694_39577607 493 chr6 39577171 ENSCAFT00000030386 ENSCAFP00000028233 S 745 N 490021 benign N +Contig6_chr9_50725202_50725646 143 chr9 50725344 ENSCAFT00000030726 ENSCAFP00000028560 M 12 T 491218 benign N +Contig221_chr6_41879771_41881379 766 chr6 41880519 ENSCAFT00000030883 ENSCAFP00000028717 A 184 T 606755 benign N +Contig231_chr5_60474911_60475630 279 chr5 60475186 ENSCAFT00000030960 ENSCAFP00000028794 C 505 Y 489618 possibly damaging N +Contig99_chr5_63306202_63308496 2063 chr5 63308224 ENSCAFT00000031146 ENSCAFP00000028978 A 421 V U unknown N +Contig245_chr5_66149146_66149848 349 chr5 66149499 ENSCAFT00000031407 ENSCAFP00000029234 R 207 Q 479601 benign cfa00760=Nicotinate and nicotinamide metabolism.cfa01100=Metabolic pathways +Contig305_chr5_67253589_67254394 375 chr5 67253954 ENSCAFT00000031570 ENSCAFP00000029391 R 203 Q U possibly damaging N +Contig94_chr9_56873843_56875505 1578 chr9 56875408 ENSCAFT00000031743 ENSCAFP00000029555 P 2937 S U benign N +Contig107_chr5_71317862_71318113 71 chr5 71317944 ENSCAFT00000031781 ENSCAFP00000029590 M 281 V U benign N +Contig134_chr9_57426140_57427208 236 chr9 57426380 ENSCAFT00000031798 ENSCAFP00000029606 V 89 I 480698 benign cfa00590=Arachidonic acid metabolism.cfa01100=Metabolic pathways +Contig60_chr12_5631507_5632392 818 chr12 5632313 ENSCAFT00000031814 ENSCAFP00000029621 Y 1697 C 481734 unknown cfa04510=Focal adhesion.cfa04512=ECM-receptor interaction.cfa04974=Protein digestion and absorption.cfa05146=Amoebiasis +Contig132_chr5_73710776_73711271 149 chr5 73710927 ENSCAFT00000031848 ENSCAFP00000029653 T 1323 M 489696 probably damaging N +Contig39_chr9_59278364_59279024 398 chr9 59278757 ENSCAFT00000032068 ENSCAFP00000029863 A 957 T 480718 benign N +Contig177_chr9_61212763_61213621 700 chr9 61213430 ENSCAFT00000032171 ENSCAFP00000029958 D 79 N U benign N +Contig67_chr6_62507717_62510152 1055 chr6 62508787 ENSCAFT00000032186 ENSCAFP00000029972 I 212 M 479959 probably damaging cfa00380=Tryptophan metabolism.cfa00450=Selenocompound metabolism.cfa01100=Metabolic pathways +Contig66_chr6_64570039_64570630 325 chr6 64570365 ENSCAFT00000032239 ENSCAFP00000030024 A 862 G 479964 benign cfa04740=Olfactory transduction.cfa04972=Pancreatic secretion +Contig22_chr6_64809414_64810661 626 chr6 64810027 ENSCAFT00000032269 ENSCAFP00000030052 Q 559 K 490179 benign N +Contig50_chr5_85052459_85052865 55 chr5 85052515 ENSCAFT00000032431 ENSCAFP00000030201 S 32 G 479688 benign N +Contig25_chr5_85095840_85098495 1627 chr5 85097474 ENSCAFT00000032433 ENSCAFP00000030203 F 681 S U benign N +Contig25_chr5_85480673_85480982 186 chr5 85480860 ENSCAFT00000032493 ENSCAFP00000030260 A 180 T 610026 possibly damaging cfa05010=Alzheimer's disease +Contig19_chr5_24601128_24602241 685 chr5 24601813 ENSCAFT00000035141 ENSCAFP00000030364 T 695 S U benign N +Contig59_chr26_11519273_11520242 659 chr26 11519937 ENSCAFT00000035276 ENSCAFP00000030520 P 160 L 403557 probably damaging cfa03015=mRNA surveillance pathway.cfa04114=Oocyte meiosis.cfa04270=Vascular smooth muscle contraction.cfa04510=Focal adhesion.cfa04720=Long-term potentiation.cfa04728=Dopaminergic synapse.cfa04810=Regulation of actin cytoskeleton.cfa04910=Insulin signaling pathway.cfa05168=Herpes simplex infection +Contig27_chr12_23130802_23131771 353 chr12 23131154 ENSCAFT00000035307 ENSCAFP00000030552 V 565 M 474935 probably damaging N +Contig31_chr1_8052327_8053606 234 chr1 8052570 ENSCAFT00000035442 ENSCAFP00000030703 C 153 S U possibly damaging N +Contig59_chr20_40539078_40540678 1223 chr20 40540302 ENSCAFT00000035532 ENSCAFP00000030804 H 285 R 403502 benign cfa04620=Toll-like receptor signaling pathway.cfa05142=Chagas disease (American trypanosomiasis).cfa05143=African trypanosomiasis.cfa05144=Malaria.cfa05152=Tuberculosis.cfa05162=Measles.cfa05168=Herpes simplex infection +Contig152_chr6_25356961_25358151 701 chr6 25357665 ENSCAFT00000035750 ENSCAFP00000031044 P 479 S 608555 benign cfa04142=Lysosome +Contig18_chr9_58576258_58576773 215 chr9 58576474 ENSCAFT00000035914 ENSCAFP00000031224 K 118 E 480706 benign N +Contig8_chr15_38734005_38734403 242 chr15 38734244 ENSCAFT00000035916 ENSCAFP00000031226 A 237 V 611996 possibly damaging N +Contig76_chr3_30625909_30626247 159 chr3 30626069 ENSCAFT00000036198 ENSCAFP00000031549 T 135 S 479171 benign cfa00260=Glycine, serine and threonine metabolism.cfa00270=Cysteine and methionine metabolism.cfa01100=Metabolic pathways +Contig86_chr37_14528768_14530343 873 chr37 14529628 ENSCAFT00000036570 ENSCAFP00000031969 V 738 D 478875.609202 possibly damaging cfa04060=Cytokine-cytokine receptor interaction.cfa04350=TGF-beta signaling pathway +Contig9_chr5_54124181_54125739 1134 chr5 54125291 ENSCAFT00000036640 ENSCAFP00000032043 A 187 T 610286 benign N +Contig107_chr9_8990420_8991676 1178 chr9 8991591 ENSCAFT00000036774 ENSCAFP00000032186 T 55 M 483288 benign N +Contig47_chr12_20319418_20320775 1212 chr12 20320622 ENSCAFT00000036825 ENSCAFP00000032241 K 606 T 474930 benign cfa00280=Valine, leucine and isoleucine degradation.cfa00630=Glyoxylate and dicarboxylate metabolism.cfa00640=Propanoate metabolism.cfa01100=Metabolic pathways +Contig4_chr2_45195542_45196115 233 chr2 45195785 ENSCAFT00000037022 ENSCAFP00000032463 D 833 N 478055 possibly damaging N +Contig8_chr8_77227029_77227651 339 chr8 77227366 ENSCAFT00000037096 ENSCAFP00000032544 T 61 A 490895.612602 benign cfa04020=Calcium signaling pathway.cfa04145=Phagosome.cfa04640=Hematopoietic cell lineage.cfa04650=Natural killer cell mediated cytotoxicity.cfa04662=B cell receptor signaling pathway.cfa04664=Fc epsilon RI signaling pathway.cfa04666=Fc gamma R-mediated phagocytosis.cfa04672=Intestinal immune network for IgA production.cfa05140=Leishmaniasis.cfa05143=African trypanosomiasis.cfa05146=Amoebiasis.cfa05150=Staphylococcus aureus infection.cfa05152=Tuberculosis.cfa05162=Measles.cfa05310=Asthma.cfa05320=Autoimmune thyroid disease.cfa05322=Systemic lupus erythematosus.cfa05323=Rheumatoid arthritis.cfa05330=Allograft rejection.cfa05340=Primary immunodeficiency.cfa05414=Dilated cardiomyopathy.cfa05416=Viral myocarditis +Contig2_chr7_60049092_60051693 266 chr7 60049361 ENSCAFT00000038176 ENSCAFP00000033857 T 195 M U probably damaging N +Contig31_chr30_24179816_24187402 4867 chr30 24184686 ENSCAFT00000038211 ENSCAFP00000033897 G 103 S U benign N +Contig9_chr27_48250956_48251793 192 chr27 48251161 ENSCAFT00000038256 ENSCAFP00000033944 T 166 M 477739 probably damaging N +Contig45_chr27_43537046_43537944 568 chr27 43537599 ENSCAFT00000038301 ENSCAFP00000033996 M 69 I 611773 benign cfa04010=MAPK signaling pathway.cfa04810=Regulation of actin cytoskeleton.cfa05200=Pathways in cancer.cfa05218=Melanoma +Contig133_chr18_28371600_28372547 83 chr18 28371695 ENSCAFT00000038383 ENSCAFP00000034090 L 102 Q 475933 probably damaging N +Contig11_chr28_8532951_8533892 511 chr28 8533462 ENSCAFT00000038937 ENSCAFP00000034728 R 19 C 477763 probably damaging cfa03008=Ribosome biogenesis in eukaryotes.cfa03013=RNA transport +Contig1_chr14_5733966_5735336 783 chr14 5734754 ENSCAFT00000039094 ENSCAFP00000034905 A 166 T U benign N +Contig48_chr27_6001075_6001818 392 chr27 6001478 ENSCAFT00000039109 ENSCAFP00000034919 R 103 H U probably damaging N +Contig40_chr11_43589173_43590288 973 chr11 43590138 ENSCAFT00000039148 ENSCAFP00000034962 R 1617 P 481557 benign N +Contig1_chr14_30424688_30425258 179 chr14 30424861 ENSCAFT00000039390 ENSCAFP00000035239 T 648 I 475245 benign cfa04666=Fc gamma R-mediated phagocytosis.cfa04810=Regulation of actin cytoskeleton +Contig58_chr8_7461111_7462065 323 chr8 7461423 ENSCAFT00000039451 ENSCAFP00000035309 L 112 F U benign N +Contig1_chr25_43094809_43095852 908 chr25 43095708 ENSCAFT00000039609 ENSCAFP00000035483 W 18 G U unknown N +Contig114_chr25_43076436_43076800 141 chr25 43076581 ENSCAFT00000039609 ENSCAFP00000035483 S 45 C U unknown N
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_in/sample.wsf Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,402 @@ +#{"column_names":["scaf","pos","A","B","qual","ref","rpos","rnuc","1A","1B","1G","1Q","2A","2B","2G","2Q","3A","3B","3G","3Q","4A","4B","4G","4Q","5A","5B","5G","5Q","6A","6B","6G","6Q","pair","dist", +#"prim","rflp"],"dbkey":"canFam2","individuals":[["PB1",9],["PB2",13],["PB3",17],["PB4",21],["PB6",25],["PB8",29]],"pos":2,"rPos":7,"ref":6,"scaffold":1,"species":"bear"} +Contig161_chr1_4641264_4641879 115 C T 73.5 chr1 4641382 C 6 0 2 45 8 0 2 51 15 0 2 72 5 0 2 42 6 0 2 45 10 0 2 57 Y 54 0.323 0 +Contig48_chr1_10150253_10151311 11 A G 94.3 chr1 10150264 A 1 0 2 30 1 0 2 30 1 0 2 30 3 0 2 36 1 0 2 30 1 0 2 30 Y 22 +99. 0 +Contig20_chr1_21313469_21313570 66 C T 54.0 chr1 21313534 C 4 0 2 39 4 0 2 39 5 0 2 42 4 0 2 39 4 0 2 39 5 0 2 42 N 1 +99. 0 +Contig86_chr1_30984450_30985684 670 C T 365.0 chr1 30985133 C 9 0 2 54 10 0 2 57 13 0 2 66 3 0 2 36 9 0 2 54 7 0 2 48 Y 145 0.031 0 +Contig5_chr1_32562160_32563940 1215 G T 163.0 chr1 32563356 G 17 0 2 78 19 0 2 84 20 0 2 87 14 0 2 69 12 0 2 63 10 0 2 57 Y 17 0.251 0 +Contig110_chr1_33385093_33386888 510 C T 270.0 chr1 33385587 A 14 0 2 69 11 0 2 60 19 0 2 84 11 0 2 60 10 0 2 57 13 0 2 66 Y 13 0.126 0 +Contig100_chr1_33562920_33564288 743 C T 178.0 chr1 33563655 C 6 0 2 45 10 0 2 57 8 0 2 51 5 0 2 42 13 0 2 66 7 0 2 48 Y 13 0.090 3 +Contig7_chr1_37302355_37302489 97 A G 59.2 chr1 37302452 G 3 0 2 36 8 0 2 51 5 0 2 42 8 0 2 51 7 0 2 48 6 0 2 45 N 56 2.812 0 +Contig62_chr1_41880715_41882180 1078 T G 57.6 chr1 41881785 T 14 0 2 69 15 0 2 72 16 0 2 75 13 0 2 66 8 0 2 51 10 0 2 57 Y 21 0.477 0 +Contig47_chr1_48409178_48409384 37 C T 134.0 chr1 48409215 T 5 0 2 42 6 0 2 45 8 0 2 51 9 0 2 54 4 0 2 39 6 0 2 45 N 66 +99. 0 +Contig119_chr1_49647683_49650077 1618 C A 99.7 chr1 49649276 A 8 0 2 51 11 0 2 60 10 0 2 57 9 0 2 54 10 0 2 57 14 0 2 69 Y 16 0.166 0 +Contig21_chr1_60697952_60699446 307 G A 51.9 chr1 60698265 G 12 0 2 63 9 0 2 54 4 0 2 39 6 0 2 45 9 0 2 54 4 0 2 39 Y 98 0.507 0 +Contig131_chr1_62319542_62320564 169 C G 103.0 chr1 62319709 C 12 0 2 63 12 0 2 66 14 0 2 69 12 0 2 63 9 0 2 54 9 0 2 54 Y 73 0.307 1 +Contig14_chr1_63450425_63450680 101 T A 102.0 chr1 63450530 T 8 0 2 51 10 0 2 57 18 0 2 81 8 0 2 51 8 0 2 34 8 0 2 51 N 99 1.085 0 +Contig83_chr1_63869778_63869942 40 T C 23.7 chr1 63869819 C 5 0 2 42 7 0 2 48 2 0 2 33 4 0 2 39 6 0 2 48 4 0 2 39 N 654 1.364 0 +Contig30_chr1_64702572_64703138 178 A T 117.0 chr1 64702750 T 10 0 2 57 10 0 2 57 20 0 2 87 21 0 2 90 6 0 2 45 12 0 2 63 Y 50 3.872 0 +Contig101_chr1_69868406_69868872 287 G A 14.6 chr1 69868689 G 13 0 2 66 17 0 2 78 10 0 2 57 8 0 2 51 7 0 2 48 8 0 2 51 N 137 0.305 0 +Contig35_chr1_74482577_74482791 170 G A 45.4 chr1 74482751 A 3 0 2 36 4 0 2 39 13 0 2 66 2 0 2 33 5 0 2 42 2 0 2 33 N 20 +99. 3 +Contig49_chr1_83865731_83865944 85 G A 34.1 chr1 -1 N 4 0 2 39 4 0 2 39 8 0 2 51 2 0 2 33 5 0 2 42 4 0 2 39 N -1 1.485 0 +Contig64_chr1_87343284_87345672 163 T A 3.76 chr1 87343443 C 0 2 2 1 0 0 -1 0 5 0 2 42 2 0 2 33 0 1 2 14 0 0 -1 0 N 3 0.039 2 +Contig20_chr1_110679280_110679687 181 C T 87.4 chr1 110679454 - 1 0 2 30 7 0 2 48 4 0 2 39 2 0 2 33 2 0 2 33 0 0 -1 0 N 31 0.660 2 +Contig129_chr1_117547123_117548666 926 G A 126.0 chr1 117548059 G 19 0 2 84 9 0 2 54 11 0 2 60 10 0 2 57 12 0 2 63 11 0 2 60 Y 64 0.049 0 +Contig7_chr1_125154638_125154844 190 G T 130.0 chr1 125154818 A 5 0 2 42 4 0 2 39 7 0 2 48 2 0 2 33 7 0 2 48 4 0 2 39 N 33 +99. 0 +Contig222_chr2_9817738_9818143 220 C T 888.0 chr2 9817960 C 17 0 2 78 12 0 2 63 20 0 2 87 8 0 2 51 11 0 2 60 12 0 2 63 Y 76 0.093 1 +Contig47_chr2_25470778_25471576 126 G A 888.0 chr2 25470896 G 12 0 2 63 14 0 2 69 14 0 2 69 10 0 2 57 18 0 2 81 13 0 2 66 N 11 0.289 1 +Contig10_chr2_40859744_40860534 637 G A 888.0 chr2 40860397 A 3 0 2 36 3 0 2 36 2 0 2 33 7 0 2 48 6 0 2 45 8 0 2 51 Y 42 1.435 0 +Contig52_chr2_41421981_41422725 604 C A 888.0 chr2 41422583 A 17 0 2 78 18 0 2 81 14 0 2 69 17 0 2 78 12 0 2 63 14 0 2 69 Y 44 0.882 0 +Contig94_chr2_43869105_43870358 220 G A 888.0 chr2 43869333 G 12 0 2 63 18 0 2 81 11 0 2 60 15 0 2 72 12 0 2 63 13 0 2 66 Y 1 0.156 0 +Contig34_chr2_48444129_48444939 695 C T 134.0 chr2 48444828 C 14 0 2 69 8 0 2 51 16 0 2 75 17 0 2 78 9 0 2 54 15 0 2 72 Y 161 0.375 0 +Contig6_chr2_56859179_56859956 671 T C 999.9 chr2 56859851 T 15 0 2 72 18 0 2 81 20 0 2 90 19 0 2 84 19 0 2 84 24 0 2 99 N 28 5.308 1 +Contig115_chr2_61631913_61632510 310 G T 999.3 chr2 61632216 G 7 0 2 48 9 0 2 54 7 0 2 48 11 0 2 60 10 0 2 57 10 0 2 57 N 13 0.184 0 +Contig31_chr2_67331584_67331785 39 C T 999.0 chr2 67331623 C 11 0 2 60 10 0 2 57 7 0 2 48 9 0 2 54 2 0 2 33 4 0 2 39 N 110 0.647 1 +Contig92_chr2_75906683_75907774 773 T C 85.4 chr2 75907438 C 12 0 2 63 12 0 2 63 17 0 2 78 8 0 2 51 8 0 2 51 13 0 2 66 Y 93 0.166 0 +Contig163_chr2_76402959_76404830 221 C T 127.0 chr2 76403181 C 4 0 2 42 10 0 2 57 9 0 2 54 11 0 2 60 7 0 2 48 9 0 2 54 Y 54 0.178 1 +Contig177_chr2_79559305_79560033 168 C T 5.67 chr2 79559476 A 2 0 2 33 3 0 2 36 1 0 2 30 2 0 2 33 0 0 -1 0 1 0 2 30 N 56 0.257 0 +Contig8_chr2_82945728_82945839 61 T C 223.0 chr2 -1 N 2 0 2 33 4 0 2 39 9 0 2 54 3 0 2 36 5 0 2 42 0 0 -1 0 N -1 +99. 1 +Contig59_chr2_85243022_85243758 506 G A 96.3 chr2 85243509 T 9 0 2 54 11 0 2 60 12 0 2 63 14 0 2 69 10 0 2 57 7 0 2 48 Y 6 0.459 0 +Contig56_chr3_17326225_17327548 387 G C 91.2 chr3 17326591 G 14 0 2 69 13 0 2 66 15 0 2 72 15 0 2 72 13 0 2 66 12 0 2 63 Y 20 0.225 3 +Contig108_chr3_46210055_46210874 367 A G 21.0 chr3 46210423 A 19 0 2 84 10 0 2 57 16 0 2 75 14 0 2 69 20 0 2 87 11 0 2 60 N 236 0.028 1 +Contig16_chr3_47113407_47114449 322 G A 105.0 chr3 47113713 G 13 0 2 66 17 0 2 78 15 0 2 72 6 0 2 45 11 0 2 60 11 0 2 60 Y 114 0.132 5 +Contig3_chr3_47564810_47565251 262 T G 112.0 chr3 47565104 T 14 0 2 69 16 0 2 75 20 0 2 87 10 0 2 57 9 0 2 54 8 0 2 51 Y 24 0.073 1 +Contig35_chr3_49662401_49662929 270 A T 96.1 chr3 49662652 A 14 0 2 69 11 0 2 60 23 0 2 96 13 0 2 66 12 0 2 63 11 0 2 60 Y 36 3.583 2 +Contig97_chr3_49820354_49821631 1069 G A 44.1 chr3 49821402 G 9 0 2 54 9 0 2 54 6 0 2 45 10 0 2 57 5 0 2 42 8 0 2 51 N 6 0.201 2 +Contig1_chr3_51588422_51589409 926 A G 51.0 chr3 51589353 G 2 0 2 33 2 0 2 33 6 0 2 45 4 0 2 39 9 0 2 54 11 0 2 60 N 21 1.147 0 +Contig25_chr3_53260697_53262560 402 G A 211.0 chr3 53261095 G 17 0 2 78 14 0 2 69 15 0 2 75 12 0 2 63 14 0 2 69 12 0 2 63 Y 116 1.033 0 +Contig11_chr3_53992739_53995954 2392 G A 82.4 chr3 53995143 A 12 0 2 66 11 0 2 60 14 0 2 69 6 0 2 45 11 0 2 60 17 0 2 78 Y 358 0.321 1 +Contig236_chr3_72676275_72676473 128 G A 278.0 chr3 72676410 G 12 0 2 63 11 0 2 60 13 0 2 66 10 0 2 57 11 0 2 60 8 0 2 51 N 36 0.496 1 +Contig48_chr3_74792236_74792388 63 T C 111.0 chr3 74792289 - 17 0 2 78 9 0 2 54 9 0 2 54 5 0 2 42 11 0 2 60 9 0 2 54 N -1 3.528 0 +Contig65_chr3_80727952_80728283 39 T C 71.2 chr3 80727990 T 7 0 2 48 3 0 2 36 8 0 2 51 6 0 2 45 8 0 2 51 11 0 2 60 N 22 7.078 0 +Contig53_chr3_86407941_86409349 1406 G A 86.9 chr3 86409317 A 5 0 2 42 5 0 2 42 4 0 2 39 10 0 2 57 8 0 2 51 12 0 2 63 N 14 3.285 1 +Contig13_chr3_92409738_92412300 718 A G 23.3 chr3 92410450 A 12 0 2 63 16 0 2 75 18 0 2 81 13 0 2 66 22 0 2 93 7 0 2 48 Y 23 0.224 2 +Contig134_chr4_12145648_12148225 1326 C T 164.0 chr4 12146961 C 9 0 2 54 8 0 2 51 7 0 2 48 3 0 2 36 5 0 2 42 5 0 2 42 Y 4 0.080 1 +Contig88_chr4_15557471_15557833 268 A G 145.0 chr4 15557737 A 6 0 2 45 6 0 2 45 11 0 2 60 9 0 2 54 5 0 2 42 6 0 2 45 Y 46 4.138 0 +Contig53_chr4_18823968_18824478 149 A G 91.3 chr4 18824115 A 18 0 2 81 15 0 2 72 21 0 2 90 13 0 2 66 9 0 2 54 12 0 2 63 N 51 0.251 0 +Contig86_chr4_24953866_24956222 1985 C T 76.4 chr4 24955841 T 8 0 2 51 1 0 2 30 3 0 2 36 7 0 2 48 2 0 2 33 6 0 2 45 Y 12 0.357 0 +Contig19_chr4_26233601_26233991 146 G C 51.6 chr4 26233744 G 10 0 2 57 8 0 2 51 9 0 2 54 5 0 2 42 9 0 2 54 4 0 2 39 N 41 0.163 3 +Contig78_chr4_28579975_28580134 30 T G 19.6 chr4 28579994 - 4 0 2 39 3 0 2 36 5 0 2 42 4 0 2 39 2 0 2 33 2 0 2 33 N 33 0.499 0 +Contig16_chr4_30177226_30179725 621 C T 88.4 chr4 30177859 C 20 0 2 87 13 0 2 66 13 0 2 66 11 0 2 60 8 0 2 51 8 0 2 51 Y 45 0.797 1 +Contig30_chr4_46196500_46197672 1045 A C 33.4 chr4 46197522 C 16 0 2 75 9 0 2 54 4 0 2 39 7 0 2 48 14 0 2 69 6 0 2 45 Y 43 0.306 0 +Contig2_chr4_47039007_47039323 158 G C 35.1 chr4 47039160 - 8 0 2 51 9 0 2 54 13 0 2 66 8 0 2 51 10 0 2 60 9 0 2 54 N 0 0.131 0 +Contig17_chr4_61310346_61311158 267 C T 49.9 chr4 61310604 T 10 0 2 57 7 0 2 48 9 0 2 54 10 0 2 57 14 0 2 69 7 0 2 48 Y 219 0.098 0 +Contig26_chr4_64190783_64191295 64 A G 162.0 chr4 64190843 A 10 0 2 57 6 0 2 45 20 0 2 87 12 0 2 63 17 0 2 78 7 0 2 48 Y 306 7.428 0 +Contig11_chr4_65500960_65501654 634 T C 107.0 chr4 65501585 T 13 0 2 66 14 0 2 69 13 0 2 66 13 0 2 66 6 0 2 45 18 0 2 81 Y 10 6.849 0 +Contig38_chr4_67768488_67768982 113 A G 102.0 chr4 67768598 A 9 0 2 54 8 0 2 51 9 0 2 54 11 0 2 60 10 0 2 57 7 0 2 48 Y 188 3.175 0 +Contig30_chr4_70978564_70979580 596 A G 164.0 chr4 70979151 A 15 0 2 72 12 0 2 63 20 0 2 87 14 0 2 69 15 0 2 72 15 0 2 72 Y 111 2.458 2 +Contig72_chr4_74225793_74226492 674 A G 110.0 chr4 74226472 A 5 0 2 42 3 0 2 36 2 0 2 33 3 0 2 36 7 0 2 48 4 0 2 39 Y 115 +99. 1 +Contig32_chr4_75618955_75620254 301 T C 333.0 chr4 75619257 C 10 0 2 57 8 0 2 51 12 0 2 63 20 0 2 87 12 0 2 63 14 0 2 69 Y 34 0.163 2 +Contig31_chr5_4734956_4736547 1166 C T 133.0 chr5 4736132 C 14 0 2 69 8 0 2 51 17 0 2 78 4 0 2 39 9 0 2 54 12 0 2 63 Y 1 0.021 0 +Contig113_chr5_11052263_11052603 28 C T 38.2 chr5 11052280 C 1 2 1 12 3 2 1 10 5 0 2 42 2 1 2 13 3 0 2 36 8 0 2 51 Y 161 +99. 0 +Contig30_chr5_15698241_15699076 396 G T 76.6 chr5 15698633 T 8 0 2 51 9 0 2 54 10 0 2 57 7 0 2 48 11 0 2 60 8 0 2 54 Y 65 0.009 0 +Contig36_chr5_17709244_17710004 373 T C 281.0 chr5 17709624 T 6 0 2 45 9 0 2 54 7 0 2 48 4 0 2 39 10 0 2 57 4 0 2 39 Y 16 0.131 0 +Contig13_chr5_21881138_21881562 227 A G 251.0 chr5 21881356 A 11 0 2 60 20 0 2 87 22 0 2 93 10 0 2 57 10 0 2 57 21 0 2 90 Y 182 2.013 0 +Contig5_chr5_23188121_23190168 1841 C T 141.0 chr5 23189975 C 20 0 2 87 19 0 2 84 22 0 2 93 16 0 2 75 18 0 2 81 14 0 2 69 N 45 0.355 0 +Contig6_chr5_26899813_26900498 97 A C 88.6 chr5 26899910 A 15 0 2 72 14 0 2 69 27 0 2 108 15 0 2 72 13 0 2 69 12 0 2 63 Y 92 7.370 3 +Contig314_chr5_34019166_34019319 72 C A 20.1 chr5 -1 N 6 0 2 45 9 0 2 54 4 0 2 39 4 0 2 39 9 0 2 54 5 0 2 42 N -1 +99. 4 +Contig147_chr5_38980258_38980559 221 C T 40.8 chr5 38980477 C 15 0 2 72 15 0 2 72 19 0 2 84 10 0 2 57 12 0 2 63 20 0 2 87 Y 11 4.576 0 +Contig115_chr5_48119079_48120169 151 C T 78.3 chr5 48119234 C 17 0 2 78 10 0 2 57 14 0 2 69 16 0 2 75 8 0 2 51 12 0 2 63 Y 205 0.320 0 +Contig45_chr5_50892738_50892968 169 C A 25.8 chr5 50892911 C 10 0 2 57 7 0 2 48 10 0 2 60 6 0 2 45 6 0 2 45 13 0 2 66 N 244 0.497 1 +Contig40_chr5_51484164_51484696 14 A G 53.3 chr5 51484180 A 6 0 2 45 4 0 2 39 4 0 2 39 3 0 2 36 0 0 2 13 3 0 2 36 N 63 +99. 1 +Contig40_chr5_51664286_51667573 861 C T 148.0 chr5 51665149 C 20 0 2 87 21 0 2 90 20 0 2 87 11 0 2 60 16 0 2 75 15 0 2 72 Y 207 0.080 1 +Contig15_chr5_51889708_51891244 882 A G 149.0 chr5 51890581 G 13 0 2 66 18 0 2 81 17 0 2 78 22 0 2 93 15 0 2 72 22 0 2 93 Y 7 0.025 1 +Contig143_chr5_57231364_57232010 294 T C 78.5 chr5 57231644 T 3 0 2 36 5 0 2 42 4 0 2 39 2 0 2 33 10 0 2 57 6 0 2 45 Y 73 0.337 2 +Contig13_chr5_57609985_57610584 496 C T 50.5 chr5 57610476 C 17 0 2 78 9 0 2 54 6 0 2 45 8 0 2 51 10 0 2 57 12 0 2 63 N 77 2.022 1 +Contig230_chr5_58486998_58487280 227 T C 192.0 chr5 58487232 T 3 0 2 36 4 0 2 39 9 0 2 54 6 0 2 45 4 0 2 39 7 0 2 48 N 24 0.100 2 +Contig385_chr5_60122961_60123128 15 C G 136.0 chr5 60122976 C 0 0 -1 0 0 0 -1 0 1 0 2 30 1 0 2 30 3 0 2 36 0 0 -1 0 N 100 +99. 2 +Contig143_chr5_65121393_65122035 558 C A 127.0 chr5 65121959 A 0 0 -1 0 5 0 2 42 3 0 2 36 4 0 2 39 0 0 -1 0 4 0 2 39 Y 285 0.391 1 +Contig32_chr5_70852360_70853289 282 G A 114.0 chr5 70852623 G 16 0 2 75 11 0 2 60 13 0 2 66 12 0 2 63 13 0 2 66 7 0 2 48 Y 33 0.276 0 +Contig215_chr5_70946445_70947428 363 T G 28.2 chr5 70946809 C 4 0 2 39 0 5 0 12 9 0 2 54 6 0 2 45 3 3 2 1 9 0 2 54 N 43 0.153 0 +Contig100_chr5_71189678_71190590 813 C T 30.8 chr5 71190523 C 11 0 2 60 11 0 2 60 9 0 2 54 10 0 2 57 6 0 2 45 13 0 2 66 Y 8 0.362 1 +Contig45_chr5_76133561_76134403 388 A G 103.0 chr5 76133941 G 3 0 2 36 8 0 2 51 8 0 2 51 5 0 2 42 6 0 2 45 7 0 2 48 Y 57 0.038 0 +Contig61_chr5_90202541_90204393 909 C T 101.0 chr5 90203461 T 7 0 2 48 5 0 2 42 14 0 2 69 3 0 2 36 5 0 2 42 8 0 2 51 Y 64 1.448 0 +Contig111_chr6_5821219_5822519 1060 A G 68.1 chr6 5822321 T 7 0 2 48 6 0 2 45 11 0 2 60 9 0 2 54 3 0 2 36 12 0 2 63 Y 7 0.231 1 +Contig220_chr6_10671338_10672441 999 T C 36.3 chr6 10672322 T 5 0 2 42 11 0 2 60 11 0 2 60 8 0 2 51 5 0 2 42 9 0 2 54 Y 1 1.667 0 +Contig226_chr6_17361986_17362884 418 G C 251.0 chr6 17362406 G 6 0 2 45 8 0 2 51 7 0 2 48 9 0 2 54 7 0 2 48 7 0 2 48 Y 7 0.147 0 +Contig380_chr6_18173971_18174169 180 C T 4.87 chr6 18174144 T 0 0 -1 0 4 0 2 39 7 0 2 48 2 0 2 33 2 0 2 33 1 0 2 30 N 56 2.589 0 +Contig51_chr6_20231207_20231785 161 A G 70.5 chr6 20231375 G 13 0 2 66 5 0 2 42 8 0 2 51 2 0 2 36 5 0 2 42 5 0 2 42 Y 153 1.754 0 +Contig102_chr6_30271329_30271577 39 T G 139.0 chr6 30271371 G 3 0 2 36 4 0 2 39 6 0 2 45 1 0 2 30 4 0 2 39 4 0 2 39 N 15 1.159 0 +Contig217_chr6_31393824_31394218 97 G A 115.0 chr6 31393921 G 9 0 2 54 19 0 2 84 15 0 2 72 12 0 2 63 7 0 2 48 10 0 2 57 N 45 0.477 0 +Contig186_chr6_31928098_31928245 73 G A 117.0 chr6 -1 N 5 0 2 42 8 0 2 51 2 0 2 33 4 0 2 39 1 0 2 30 5 0 2 42 N -1 0.276 1 +Contig52_chr6_33188498_33188724 123 G A 59.0 chr6 -1 N 5 0 2 42 13 0 2 66 8 0 2 51 4 0 2 39 9 0 2 54 9 0 2 54 N -1 0.880 1 +Contig102_chr6_38743009_38743435 290 A G 178.0 chr6 38743311 A 11 0 2 60 13 0 2 66 9 0 2 54 11 0 2 60 12 0 2 63 13 0 2 66 Y 34 0.148 4 +Contig81_chr6_49018353_49019532 179 C A 72.5 chr6 49018530 A 15 0 2 72 13 0 2 66 19 0 2 72 8 0 2 51 12 0 2 63 16 0 2 75 Y 15 0.145 1 +Contig112_chr6_51024554_51024851 100 A G 121.0 chr6 51024654 A 10 0 2 57 12 0 2 63 9 0 2 54 13 0 2 66 14 0 2 69 17 0 2 78 N 75 4.287 0 +Contig40_chr6_51412751_51413807 227 T C 94.5 chr6 51412975 C 5 0 2 42 8 0 2 51 7 0 2 48 9 0 2 54 11 0 2 60 10 0 2 57 Y 4 5.661 0 +Contig47_chr6_69073222_69074767 1315 T C 212.0 chr6 69074558 T 20 0 2 87 17 0 2 78 18 0 2 81 12 0 2 63 17 0 2 78 7 0 2 48 Y 9 0.652 0 +Contig30_chr6_74848932_74849059 57 C G 46.3 chr6 74848993 C 7 0 2 48 7 0 2 33 6 0 2 45 7 0 2 48 5 0 2 42 6 0 2 45 N -1 +99. 1 +Contig84_chr7_6648683_6650255 1297 G A 110.0 chr7 6649988 G 18 0 2 81 9 0 2 54 22 0 2 77 16 0 2 75 20 0 2 87 6 0 2 45 Y 83 0.166 0 +Contig239_chr7_13007379_13007700 275 A G 39.8 chr7 13007642 A 8 0 2 51 5 0 2 42 8 0 2 51 3 0 2 36 3 0 2 36 5 0 2 42 N 46 1.511 3 +Contig119_chr7_18310707_18310948 23 A T 133.0 chr7 18310729 A 6 0 2 45 5 0 2 42 10 0 2 57 5 0 2 42 2 0 2 33 2 0 2 33 N 4553 +99. 0 +Contig93_chr7_18513377_18513741 173 T C 130.0 chr7 18513533 C 15 0 2 72 11 0 2 60 18 0 2 81 6 0 2 45 10 0 2 57 14 0 2 69 Y 115 0.174 0 +Contig133_chr7_19603333_19603776 414 C G 31.9 chr7 19603734 G 10 0 2 57 4 0 2 39 4 0 2 39 5 0 2 42 9 0 2 54 9 0 2 54 N 78 +99. 5 +Contig132_chr7_20426224_20428145 1815 A G 28.3 chr7 20428041 A 11 1 2 43 12 0 2 63 19 0 2 84 23 0 2 96 14 0 2 69 10 0 2 57 N 11 0.264 0 +Contig206_chr7_26281823_26282074 103 C A 101.0 chr7 26281925 T 11 0 2 60 16 0 2 61 19 0 2 84 6 0 2 45 19 0 2 84 16 0 2 75 N -1 0.947 1 +Contig116_chr7_45858984_45859111 38 T C 73.2 chr7 -1 N 2 0 2 33 1 0 2 30 3 0 2 36 2 0 2 33 2 0 2 33 1 0 2 30 N -1 3.442 0 +Contig38_chr7_50681997_50682600 42 T C 92.4 chr7 50682037 G 6 0 2 45 2 0 2 33 10 0 2 57 12 0 2 63 5 0 2 42 6 0 2 45 Y 94 0.146 0 +Contig55_chr7_53147505_53148974 894 A G 68.4 chr7 53148397 G 22 0 2 93 13 0 2 66 16 0 2 75 8 0 2 51 16 0 2 75 11 0 2 60 Y 19 0.060 0 +Contig4_chr7_53685534_53688206 1709 C G 76.2 chr7 53687225 C 18 0 2 81 17 0 2 78 18 0 2 81 15 0 2 72 14 0 2 69 14 0 2 69 Y 32 0.659 1 +Contig61_chr7_55832923_55834065 506 T C 185.0 chr7 55833450 C 9 0 2 54 10 0 2 57 22 0 2 93 12 0 2 63 12 0 2 63 7 0 2 48 Y 1 0.019 0 +Contig91_chr8_12804505_12805470 409 C A 111.0 chr8 12804906 C 8 0 2 51 10 0 2 57 15 0 2 72 12 0 2 63 14 0 2 69 15 0 2 72 N 145 0.175 0 +Contig30_chr8_17147743_17147923 13 G A 105.0 chr8 17147756 A 1 3 1 19 1 0 2 30 3 0 2 36 1 0 2 30 1 0 2 30 3 0 2 36 N 6 +99. 0 +Contig8_chr8_27811135_27812620 333 C T 37.9 chr8 27811458 C 4 0 2 39 11 0 2 60 18 0 2 81 5 0 2 42 6 0 2 45 5 0 2 42 Y 1 0.272 0 +Contig66_chr8_28273102_28273660 175 G C 81.6 chr8 28273263 T 9 0 2 54 17 0 2 78 19 0 2 84 8 0 2 51 16 0 2 75 19 0 2 84 Y 3 2.735 0 +Contig84_chr8_31375511_31376456 443 T C 125.0 chr8 31375954 T 10 0 2 57 15 0 2 72 27 0 2 108 18 0 2 81 16 0 2 75 9 0 2 54 Y 2 0.650 0 +Contig18_chr8_32575859_32577431 264 T C 151.0 chr8 32576124 T 20 0 2 87 14 0 2 69 17 0 2 78 14 0 2 69 13 0 2 66 14 0 2 69 Y 17 0.915 1 +Contig54_chr8_40913908_40916451 1275 G A 175.0 chr8 40915190 G 10 0 2 57 8 0 2 51 11 0 2 60 7 0 2 48 8 0 2 51 9 0 2 54 Y 21 0.056 3 +Contig93_chr8_44658786_44659075 180 T G 55.3 chr8 44658964 T 4 0 2 39 3 0 2 36 6 0 2 45 5 0 2 45 5 0 2 42 4 0 2 39 N 14 0.188 0 +Contig17_chr8_57490059_57490498 69 G T 97.4 chr8 57490127 A 2 0 2 33 11 0 2 60 15 0 2 72 16 0 2 75 8 0 2 51 10 0 2 57 N 40 0.522 5 +Contig66_chr8_58562376_58563446 345 C G 5.74 chr8 58562721 C 14 0 2 69 12 0 2 63 9 0 2 57 10 0 2 57 9 0 2 54 10 0 2 57 Y 6 0.685 0 +Contig44_chr8_71186368_71188207 1455 G T 147.0 chr8 71187818 G 4 10 1 74 3 0 2 36 20 0 2 87 12 0 2 63 8 0 2 51 10 0 2 57 Y 88 0.036 0 +Contig73_chr9_29451535_29452248 616 A G 24.7 chr9 29452127 G 4 0 2 39 7 0 2 48 1 0 2 30 4 0 2 39 7 0 2 48 6 0 2 45 N 49 0.448 4 +Contig96_chr9_39008495_39009278 215 A C 98.7 chr9 39008708 C 7 0 2 48 13 0 2 66 28 0 2 111 16 0 2 75 17 0 2 78 17 0 2 78 Y 8 0.427 1 +Contig22_chr10_15505382_15505589 172 T C 38.5 chr10 15505548 T 2 0 2 33 6 0 2 45 8 0 2 51 8 0 2 51 9 0 2 54 12 0 2 63 N 284 2.861 0 +Contig69_chr10_40547265_40548153 371 G A 58.1 chr10 40547649 A 9 0 2 54 8 0 2 51 8 0 2 51 9 0 2 54 4 0 2 39 5 0 2 42 Y 20 0.138 4 +Contig63_chr10_42716594_42719945 1018 A G 88.7 chr10 42717616 G 13 0 2 66 14 0 2 69 13 0 2 66 12 0 2 63 18 0 2 81 5 0 2 42 Y 25 1.740 0 +Contig22_chr10_43255307_43255570 81 C A 37.2 chr10 43255383 C 15 0 2 72 18 0 2 81 22 0 2 93 16 0 2 75 11 0 2 60 12 0 2 63 N 62 0.450 0 +Contig9_chr10_51475063_51476054 770 C T 57.3 chr10 51475839 C 6 0 2 45 16 0 2 75 16 0 2 75 13 0 2 66 9 0 2 54 9 2 2 21 N 80 0.394 0 +Contig42_chr10_53816543_53818392 1642 G A 27.5 chr10 53818172 A 7 0 2 48 13 0 2 66 17 0 2 78 14 0 2 69 19 0 2 84 16 0 2 75 N 1 0.433 0 +Contig36_chr10_53992615_53993741 229 G C 86.2 chr10 53992846 G 17 0 2 78 14 0 2 69 13 0 2 66 15 0 2 72 12 0 2 63 15 0 2 72 N 23 1.912 0 +Contig20_chr10_58141129_58141750 575 C T 46.1 chr10 58141701 C 7 0 2 48 8 0 2 51 9 0 2 54 3 0 2 36 4 0 2 39 9 0 2 54 N 1 4.264 0 +Contig26_chr10_59510973_59511899 146 C A 29.0 chr10 59511126 C 8 0 2 51 13 0 2 66 18 0 2 81 13 0 2 66 10 0 2 57 7 0 2 48 Y 208 1.077 0 +Contig72_chr11_7142765_7143772 146 G A 152.0 chr11 7142911 A 8 0 2 51 8 0 2 51 24 0 2 99 10 0 2 57 17 0 2 78 11 0 2 60 Y 90 1.137 0 +Contig103_chr11_8844784_8845095 214 T G 135.0 chr11 8844993 T 1 1 2 12 10 0 2 57 5 4 1 26 2 3 1 13 2 7 1 34 1 1 2 13 Y 75 0.731 0 +Contig9_chr11_9904571_9905983 1284 C T 151.0 chr11 9905857 C 16 0 2 75 19 0 2 84 17 0 2 78 16 0 2 75 12 0 2 63 13 1 2 44 Y 11 0.422 1 +Contig35_chr11_22459883_22460855 714 T G 54.9 chr11 22460577 T 3 0 2 36 1 0 2 30 3 0 2 36 2 0 2 33 2 0 2 33 0 0 -1 0 N 24 0.382 0 +Contig7_chr11_40017076_40017630 352 C T 46.3 chr11 40017422 C 7 0 2 48 9 0 2 54 6 0 2 45 8 0 2 51 16 0 2 75 9 0 2 54 Y 44 0.336 0 +Contig108_chr11_42953408_42955156 367 A G 89.4 chr11 42953779 A 17 0 2 78 11 0 2 60 14 0 2 69 20 0 2 87 14 0 2 69 17 0 2 78 Y 118 0.784 1 +Contig82_chr11_43490732_43490862 60 C T 47.3 chr11 -1 N 0 0 -1 0 0 0 -1 0 1 0 2 30 3 0 2 36 1 1 2 19 1 0 2 30 N -1 6.763 0 +Contig16_chr11_53408448_53408790 187 A G 153.0 chr11 53408638 A 7 0 2 48 9 0 2 54 18 0 2 81 10 0 2 57 11 0 2 60 12 0 2 63 Y 116 1.367 0 +Contig21_chr12_18403415_18404381 586 G T 34.5 chr12 18403983 - 13 0 2 66 16 0 2 75 25 0 2 102 12 0 2 63 12 0 2 63 14 0 2 69 Y 12 0.068 0 +Contig33_chr12_19804073_19804529 178 T C 69.4 chr12 19804261 T 13 0 2 66 13 0 2 66 22 0 2 93 11 0 2 60 12 0 2 63 18 0 2 81 Y 11 1.571 0 +Contig41_chr12_25565452_25566993 475 G T 6.29 chr12 25565926 G 15 0 2 72 14 0 2 69 10 0 2 57 15 0 2 72 18 0 2 81 19 0 2 84 N 10 2.231 1 +Contig9_chr12_27204351_27204696 239 A G 145.0 chr12 27204587 A 7 0 2 48 8 0 2 51 12 0 2 63 8 0 2 51 11 0 2 60 11 0 2 60 Y 14 0.046 0 +Contig45_chr12_30548282_30550498 448 C T 124.0 chr12 30548703 - 9 0 2 54 11 0 2 60 22 0 2 93 19 0 2 84 12 0 2 63 12 0 2 63 Y 66 0.305 0 +Contig46_chr12_35571846_35572563 58 G C 83.2 chr12 35571906 G 4 0 2 39 10 0 2 57 11 0 2 60 6 0 2 45 10 0 2 57 6 0 2 45 Y 55 +99. 1 +Contig28_chr12_42075871_42076044 136 G A 134.0 chr12 42076006 A 6 0 2 45 5 0 2 42 7 0 2 48 7 0 2 48 2 0 2 33 4 0 2 39 N 3 9.479 0 +Contig16_chr12_42386141_42387454 194 A G 161.0 chr12 42386323 A 11 0 2 60 8 0 2 54 23 0 2 96 17 0 2 78 6 0 2 45 13 0 2 66 Y 7 0.927 1 +Contig42_chr12_44424628_44425829 255 A G 84.4 chr12 44424879 A 12 0 2 63 19 0 2 84 23 0 2 96 15 0 2 72 18 0 2 81 14 0 2 69 Y 18 1.190 2 +Contig10_chr12_44447953_44449698 63 C T 105.0 chr12 44448020 C 11 0 2 60 9 0 2 54 12 0 2 63 10 0 2 57 15 0 2 72 8 0 2 51 Y 31 11.791 0 +Contig5_chr12_53880670_53882675 1221 A C 99.4 chr12 53881888 A 16 0 2 75 18 0 2 81 23 0 2 96 10 0 2 57 15 0 2 72 17 0 2 78 Y 31 0.061 0 +Contig86_chr12_56715356_56716464 818 T C 166.0 chr12 56716164 T 20 0 2 87 16 0 2 75 16 0 2 75 14 0 2 69 13 0 2 66 7 0 2 48 Y 22 1.092 0 +Contig3_chr12_65021967_65024097 238 T G 92.6 chr12 65022205 T 17 0 2 78 14 0 2 69 16 0 2 75 9 0 2 54 13 0 2 66 15 0 2 72 Y 258 0.117 0 +Contig43_chr12_66499742_66500010 121 G T 41.5 chr12 66499866 G 12 0 2 63 4 0 2 39 8 0 2 51 6 0 2 45 10 0 2 57 6 0 2 45 N 42 0.421 0 +Contig14_chr12_71364692_71365311 20 A C 103.0 chr12 71364712 A 7 0 2 48 3 0 2 36 5 0 2 42 1 0 2 30 2 0 2 33 3 0 2 36 Y 35 +99. 0 +Contig37_chr13_15910164_15910426 245 G A 32.9 chr13 -1 N 3 4 1 41 4 0 2 39 3 0 2 36 4 0 2 39 3 0 2 36 10 0 2 57 N -1 2.159 1 +Contig107_chr13_26045881_26046290 341 C G 81.4 chr13 26046230 C 16 0 2 75 20 0 2 90 14 0 2 69 15 0 2 72 9 0 2 54 9 0 2 54 Y 51 4.510 0 +Contig251_chr13_28498333_28501066 864 T G 296.0 chr13 28499180 T 3 0 2 36 5 0 2 42 4 0 2 39 2 0 2 33 5 0 2 42 6 0 2 45 Y 9 0.068 0 +Contig154_chr13_36777857_36778736 356 G A 95.5 chr13 36778225 A 6 0 2 45 11 0 2 60 11 0 2 60 9 0 2 54 13 0 2 66 8 0 2 51 Y 59 0.192 0 +Contig37_chr13_42529793_42530857 150 G T 192.0 chr13 42529926 G 18 0 2 81 14 0 2 69 16 0 2 75 14 0 2 69 8 0 2 51 11 0 2 60 N 22 0.795 5 +Contig47_chr13_47045833_47046626 257 A C 28.5 chr13 47046097 A 13 0 2 66 10 0 2 57 17 0 2 78 20 0 2 87 15 0 2 72 9 0 2 57 N 129 0.468 0 +Contig42_chr13_47730018_47730856 254 A G 75.1 chr13 47730294 A 13 0 2 66 6 0 2 45 12 0 2 63 9 0 2 54 16 0 2 75 11 0 2 63 Y 630 0.049 1 +Contig55_chr13_53467708_53468101 221 T G 132.0 chr13 53467925 T 25 0 2 102 12 0 2 63 26 0 2 105 7 0 2 48 16 0 2 75 16 0 2 75 N 20 5.717 1 +Contig49_chr13_55103679_55105532 503 G A 76.0 chr13 55104178 G 21 0 2 90 19 0 2 84 18 0 2 81 20 0 2 87 8 9 1 89 17 0 2 78 Y 20 0.259 1 +Contig42_chr13_64785759_64786045 14 C G 22.8 chr13 64785772 C 2 0 2 33 2 0 2 33 4 0 2 39 7 0 2 48 8 0 2 51 2 0 2 33 N 527 +99. 1 +Contig66_chr13_66021813_66022244 319 C T 125.0 chr13 66022136 C 11 0 2 60 16 0 2 75 15 0 2 75 12 0 2 63 17 0 2 78 8 0 2 51 N 14 0.055 3 +Contig48_chr14_11839435_11843272 3014 A G 163.0 chr14 11842446 A 10 0 2 57 8 0 2 51 13 0 2 66 10 0 2 57 5 0 2 42 10 0 2 57 Y 31 0.908 0 +Contig9_chr14_23353717_23354432 80 G A 61.3 chr14 23353797 G 3 0 2 36 6 0 2 45 11 0 2 60 8 0 2 51 4 0 2 39 2 4 1 35 Y 11 0.444 0 +Contig14_chr14_24131180_24133488 1633 G A 131.0 chr14 24132818 G 21 0 2 90 16 0 2 75 12 0 2 63 10 0 2 57 11 0 2 60 20 0 2 87 Y 36 0.347 0 +Contig28_chr14_26905747_26909514 975 G C 3.13 chr14 26906723 G 16 0 2 75 10 0 2 57 12 0 2 63 15 0 2 72 10 0 2 57 7 0 2 48 N 287 0.117 2 +Contig14_chr14_29616948_29618316 109 G A 80.3 chr14 29617053 - 17 0 2 78 16 0 2 75 16 0 2 75 10 0 2 57 17 0 2 78 19 0 2 84 Y 32 1.051 0 +Contig24_chr14_29728478_29728839 242 T A 107.0 chr14 29728724 T 2 0 2 33 12 0 2 63 10 0 2 57 12 0 2 63 5 0 2 42 9 0 2 54 N 70 2.712 0 +Contig76_chr14_30028102_30029179 1046 C T 38.5 chr14 30029169 T 3 0 2 36 6 0 2 45 9 0 2 54 7 0 2 48 9 0 2 54 8 0 2 51 Y 96 +99. 0 +Contig115_chr14_31417207_31417574 259 A G 12.1 chr14 31417454 G 13 0 2 66 15 0 2 72 21 0 2 90 12 0 2 63 13 0 2 66 9 0 2 54 N 28 5.379 2 +Contig70_chr14_46653662_46653790 111 G A 46.7 chr14 46653768 G 7 0 2 48 5 0 2 42 11 0 2 60 11 0 2 60 8 0 2 51 10 0 2 57 N 21 +99. 2 +Contig43_chr14_49991855_49993511 918 A G 112.0 chr14 49992767 G 15 0 2 72 10 0 2 57 11 0 2 63 9 0 2 54 12 0 2 63 9 0 2 54 Y 6 0.314 1 +Contig64_chr14_56768376_56768902 473 C T 29.0 chr14 56768832 C 15 0 2 72 11 0 2 60 14 0 2 69 14 0 2 69 7 0 2 48 9 0 2 54 Y 91 8.281 0 +Contig60_chr15_18493036_18494316 150 G A 92.6 chr15 18493188 G 9 0 2 54 13 0 2 66 9 0 2 54 6 0 2 45 5 0 2 42 12 0 2 63 Y 45 0.125 0 +Contig213_chr15_19567788_19568626 196 A C 13.9 chr15 19567992 A 4 0 2 39 2 0 2 33 7 0 2 48 4 0 2 39 4 0 2 39 6 0 2 45 Y 111 0.043 0 +Contig59_chr15_22138344_22138535 120 G C 142.0 chr15 22138470 C 11 0 2 60 10 0 2 57 18 0 2 81 4 0 2 39 10 0 2 57 15 0 2 72 N 8 2.553 0 +Contig112_chr15_26772864_26773267 374 C T 21.6 chr15 26773244 C 4 0 2 39 4 0 2 39 5 0 2 42 2 0 2 33 4 0 2 39 3 0 2 36 N 18 +99. 0 +Contig24_chr15_26894765_26895003 155 G A 87.6 chr15 -1 N 6 0 2 45 5 0 2 42 7 0 2 48 4 0 2 39 4 0 2 39 2 0 2 33 N -1 0.178 0 +Contig2_chr15_33944796_33947182 1860 G A 99.5 chr15 33946654 G 10 0 2 57 11 0 2 60 16 0 2 75 14 0 2 69 14 0 2 69 16 0 2 75 Y 16 0.252 0 +Contig73_chr15_34690052_34691332 714 T C 130.0 chr15 34690769 T 7 0 2 48 7 0 2 48 17 0 2 78 9 0 2 54 9 0 2 54 4 0 2 39 Y 7 6.003 0 +Contig68_chr15_37747190_37747426 126 G A 130.0 chr15 37747331 G 14 0 2 69 14 0 2 69 11 0 2 63 19 0 2 84 13 0 2 66 21 0 2 90 N 229 0.255 0 +Contig35_chr15_41400484_41400672 160 A C 143.0 chr15 -1 N 1 0 2 30 2 0 2 33 0 0 -1 0 2 0 2 33 3 0 2 36 2 0 2 33 N -1 +99. 0 +Contig104_chr15_45106954_45107158 70 A T 64.4 chr15 45107015 A 6 0 2 45 6 0 2 45 19 0 2 84 7 0 2 48 7 0 2 48 3 0 2 36 N 202 4.319 0 +Contig119_chr16_6160274_6160477 180 G A 54.8 chr16 6160457 G 7 0 2 48 6 0 2 45 12 0 2 63 3 0 2 36 11 0 2 60 10 0 2 57 N 42 +99. 0 +Contig126_chr16_10611887_10612152 150 G T 145.0 chr16 10612037 G 14 0 2 69 9 0 2 54 11 0 2 63 8 0 2 51 8 0 2 51 11 0 2 60 N 15 0.104 6 +Contig114_chr16_12565220_12565676 10 G A 134.0 chr16 12565230 G 0 0 -1 0 2 0 2 33 2 0 2 33 0 0 -1 0 1 0 2 30 1 0 2 30 N 333 +99. 0 +Contig43_chr16_20200090_20200514 70 A G 58.6 chr16 20200154 A 11 0 2 60 15 0 2 72 15 0 2 72 6 0 2 45 9 0 2 54 12 0 2 63 Y 2 0.466 1 +Contig60_chr16_28079136_28080263 588 T G 157.0 chr16 28079739 T 22 0 2 93 20 0 2 87 22 0 2 93 17 0 2 78 12 0 2 63 10 0 2 57 Y 105 5.999 1 +Contig70_chr16_33758668_33759655 104 A T 58.1 chr16 33758772 A 6 0 2 45 7 0 2 48 17 0 2 78 14 0 2 69 8 0 2 51 10 0 2 57 N 54 0.162 0 +Contig66_chr16_37935682_37935831 116 T C 99.2 chr16 37935802 C 12 0 2 63 6 0 2 45 19 0 2 84 12 0 2 63 13 0 2 66 17 0 2 78 N 266 +99. 2 +Contig16_chr16_40451506_40451643 84 A G 59.8 chr16 40451592 A 7 0 2 48 5 0 2 42 7 0 2 48 13 0 2 66 14 0 2 69 19 0 2 84 N 45 5.061 0 +Contig53_chr16_49888293_49888587 260 G A 108.0 chr16 49888550 A 4 0 2 39 1 0 2 30 3 0 2 36 5 0 2 42 2 0 2 33 2 0 2 33 Y 9 0.261 1 +Contig31_chr17_12128267_12129637 205 G A 90.5 chr17 12128484 G 7 0 2 48 6 0 2 45 6 0 2 45 11 0 2 60 7 0 2 48 4 0 2 39 Y 10 0.246 0 +Contig50_chr17_12247973_12249183 889 G T 47.6 chr17 12248878 G 0 1 2 9 8 0 2 51 9 2 2 21 7 2 2 21 15 0 2 72 0 3 0 9 Y 1 1.181 0 +Contig1_chr17_12979232_12980380 808 G T 12.3 chr17 12980028 G 18 0 2 81 12 0 2 63 21 0 2 90 13 0 2 66 22 0 2 93 18 0 2 81 Y 9 0.336 1 +Contig63_chr17_14186372_14186928 54 C T 70.7 chr17 14186427 C 6 0 2 45 2 0 2 33 5 0 2 42 6 0 2 45 3 0 2 36 3 0 2 36 Y 11 0.560 3 +Contig42_chr17_23434859_23438330 2100 C T 39.5 chr17 23436985 T 4 0 2 39 7 0 2 48 7 0 2 48 3 0 2 36 6 0 2 45 2 0 2 33 Y 25 0.344 0 +Contig63_chr17_23796320_23796814 220 A G 54.0 chr17 23796536 G 6 0 2 45 4 0 2 39 5 0 2 42 6 0 2 45 4 0 2 39 6 0 2 45 Y 139 0.067 1 +Contig76_chr17_24107434_24107834 316 T C 141.0 chr17 24107726 T 19 0 2 84 15 0 2 72 20 0 2 87 16 0 2 75 11 0 2 60 18 0 2 81 Y 30 0.175 2 +Contig99_chr17_26021506_26022200 505 C T 88.8 chr17 26022017 T 15 0 2 72 13 0 2 66 19 0 2 84 9 0 2 54 10 0 2 57 11 0 2 60 Y 1 0.172 1 +Contig59_chr17_26790302_26795045 287 C T 45.1 chr17 26790582 C 8 0 2 51 6 0 2 45 13 0 2 66 6 0 2 45 15 0 2 72 12 0 2 63 Y 75 0.019 1 +Contig99_chr17_27018324_27019378 446 G A 31.1 chr17 27018776 G 14 0 2 69 12 0 2 63 14 0 2 69 10 0 2 57 9 0 2 54 11 0 2 60 Y 13 0.290 4 +Contig125_chr17_27739115_27739410 63 G A 107.0 chr17 27739177 G 8 0 2 51 11 0 2 60 16 0 2 75 8 0 2 51 4 0 2 39 15 0 2 72 N 100 0.819 0 +Contig115_chr17_37489899_37490101 159 G A 62.4 chr17 37490067 G 4 0 2 39 3 0 2 36 4 0 2 39 4 0 2 39 3 0 2 36 6 0 2 45 N 4 1.411 1 +Contig180_chr17_45154356_45154925 524 A G 146.0 chr17 45154886 G 7 0 2 48 9 0 2 54 7 0 2 48 9 0 2 54 4 0 2 39 8 0 2 51 Y 11 +99. 2 +Contig61_chr17_48221795_48223545 1404 T A 177.0 chr17 48223216 T 15 0 2 72 14 0 2 69 24 0 2 99 17 0 2 78 18 0 2 81 24 0 2 99 Y 161 0.633 2 +Contig27_chr17_61713766_61716585 1056 G C 40.0 chr17 61714821 G 4 0 2 39 8 0 2 51 10 0 2 57 6 0 2 45 6 0 2 45 3 0 2 36 N 6 2.200 4 +Contig229_chr18_3706523_3708577 1076 A G 83.9 chr18 3707630 A 11 0 2 60 13 0 2 66 26 0 2 105 11 0 2 60 15 0 2 72 17 0 2 78 Y 63 0.445 0 +Contig24_chr18_14049894_14050480 24 A G 123.0 chr18 14049918 A 5 0 2 42 5 0 2 42 4 0 2 39 6 0 2 45 7 0 2 48 5 0 2 42 Y 17 +99. 0 +Contig30_chr18_18771753_18772121 39 C G 48.5 chr18 18771787 C 2 0 2 33 5 0 2 42 2 0 2 33 6 0 2 45 3 0 2 36 2 0 2 33 N 5 0.135 0 +Contig123_chr18_19916160_19916379 116 G A 79.2 chr18 19916272 A 14 0 2 69 12 0 2 63 14 0 2 69 6 0 2 45 11 0 2 60 10 0 2 57 N 26 0.172 0 +Contig82_chr18_27305489_27306229 566 C T 49.5 chr18 27306051 A 6 0 2 45 6 0 2 45 10 0 2 57 11 0 2 60 6 0 2 45 7 0 2 48 N 1 0.349 0 +Contig71_chr18_34324706_34326687 136 G A 151.0 chr18 34324841 G 9 0 2 54 9 0 2 54 17 0 2 78 8 0 2 51 11 0 2 60 10 0 2 57 Y 2 2.129 2 +Contig16_chr18_34672093_34673044 538 T C 58.2 chr18 34672635 T 8 0 2 51 15 0 2 72 16 0 2 75 15 0 2 72 9 0 2 57 18 0 2 81 Y 8 0.214 1 +Contig96_chr18_38492535_38493333 624 G A 119.0 chr18 38493162 T 17 0 2 78 12 0 2 63 13 0 2 66 16 0 2 75 8 0 2 51 15 0 2 72 Y 127 0.131 0 +Contig226_chr18_47753756_47754666 427 T C 21.1 chr18 47754215 T 10 0 2 57 4 0 2 39 8 0 2 51 5 0 2 42 6 0 2 45 7 0 2 48 Y 42 0.522 0 +Contig170_chr18_49411558_49412230 94 C A 74.3 chr18 49411655 C 14 0 2 69 10 0 2 57 9 0 2 54 10 0 2 57 3 0 2 36 3 0 2 36 N 9 1.457 0 +Contig192_chr18_49419342_49420737 1058 C T 42.8 chr18 49420381 A 3 0 2 36 4 0 2 39 5 0 2 42 8 0 2 51 3 0 2 36 3 0 2 36 Y 34 2.107 2 +Contig64_chr18_55979770_55980315 49 G A 89.1 chr18 55979824 G 3 0 2 36 9 0 2 54 7 0 2 51 4 0 2 39 3 0 2 36 3 0 2 36 Y -1 2.124 0 +Contig20_chr18_58130301_58130735 112 A G 74.4 chr18 58130413 A 12 0 2 66 11 0 2 60 11 0 2 60 12 0 2 63 6 0 2 45 6 0 2 45 Y 10 0.290 0 +Contig146_chr19_5221790_5223013 143 A G 114.0 chr19 5221916 - 1 0 2 30 4 0 2 39 3 0 2 36 5 0 2 42 2 0 2 33 5 0 2 42 Y 12 0.870 0 +Contig13_chr19_7739961_7740118 26 C G 220.0 chr19 -1 N 3 0 2 36 1 0 2 30 2 0 2 33 3 0 2 36 1 0 2 30 2 0 2 33 N -1 +99. 0 +Contig67_chr19_12398520_12399367 499 C T 161.0 chr19 12399017 C 10 0 2 57 11 0 2 60 20 0 2 87 14 0 2 69 24 0 2 99 8 0 2 51 Y 137 5.634 0 +Contig66_chr19_16285672_16287223 996 C T 190.0 chr19 16286674 C 9 0 2 57 14 0 2 69 16 0 2 78 17 0 2 78 8 0 2 51 22 0 2 93 Y 40 0.110 0 +Contig129_chr19_25541958_25542221 202 T C 68.1 chr19 25542154 C 11 0 2 60 19 0 2 84 10 0 2 60 17 0 2 78 9 0 2 54 12 0 2 63 N -1 2.551 1 +Contig152_chr19_34274440_34275622 1072 C T 48.0 chr19 34275509 T 1 0 2 30 2 0 2 33 1 0 2 30 1 0 2 30 4 0 2 39 5 0 2 42 N 71 0.309 0 +Contig29_chr19_37339947_37341911 1692 C T 211.0 chr19 37341631 C 15 0 2 72 20 0 2 87 11 0 2 60 15 0 2 72 3 0 2 36 12 0 2 63 Y 7 0.096 0 +Contig39_chr19_47709708_47711327 444 C T 36.8 chr19 47710148 T 10 0 2 57 4 0 2 39 8 0 2 51 9 0 2 54 6 0 2 45 6 0 2 45 Y 95 1.251 1 +Contig60_chr19_54013816_54014398 281 A G 138.0 chr19 54014103 C 6 0 2 45 15 0 2 72 7 0 2 48 10 0 2 57 15 0 2 72 10 0 2 57 Y 188 1.271 0 +Contig251_chr19_56559098_56559626 452 T C 3.36 chr19 56559549 T 12 0 2 63 13 0 2 66 21 0 2 90 15 0 2 72 14 0 2 69 11 0 2 60 N 1 0.117 0 +Contig50_chr20_12138509_12141975 3206 C A 248.0 chr20 12141763 C 8 0 2 51 15 0 2 72 14 0 2 69 6 0 2 45 10 0 2 57 7 0 2 48 Y 2 0.384 0 +Contig36_chr20_32631363_32632049 176 G A 24.1 chr20 32631526 G 7 0 2 48 14 0 2 69 19 0 2 84 14 0 2 69 15 0 2 72 16 0 2 75 N 50 1.150 0 +Contig39_chr20_36316398_36316498 57 C T 30.3 chr20 36316455 C 2 0 2 33 0 1 2 8 0 0 -1 0 0 1 2 10 0 0 -1 0 0 0 -1 0 N -483 +99. 0 +Contig32_chr20_36468058_36468869 66 C T 40.4 chr20 36468127 C 6 0 2 45 3 0 2 36 4 0 2 39 5 0 2 42 3 0 2 36 4 0 2 39 N 59 0.281 0 +Contig24_chr20_38203888_38204900 834 C T 132.0 chr20 38204731 C 9 0 2 54 17 0 2 78 20 0 2 87 8 0 2 51 11 0 2 60 17 0 2 78 Y 14 0.397 0 +Contig79_chr20_44263127_44264103 456 G T 31.5 chr20 44263573 G 22 0 2 93 16 0 2 75 15 0 2 72 19 0 2 84 13 0 2 66 26 0 2 105 Y 8 3.250 0 +Contig26_chr20_45878482_45878787 197 A G 160.0 chr20 45878672 A 17 0 2 78 15 0 2 72 11 0 2 63 17 0 2 78 12 0 2 63 10 0 2 57 N 14 0.535 0 +Contig119_chr20_46550670_46551383 609 G A 139.0 chr20 46551277 G 7 0 2 48 17 0 2 78 19 0 2 84 20 0 2 87 9 0 2 54 15 0 2 72 Y 7 0.488 1 +Contig50_chr21_4178523_4178687 121 G A 362.0 chr21 4178640 G 8 0 2 51 14 0 2 69 5 0 2 42 3 0 2 36 11 0 2 60 4 0 2 39 N 392 0.483 0 +Contig103_chr21_10177255_10177765 121 G A 125.0 chr21 10177367 G 12 0 2 63 10 0 2 57 10 0 2 57 17 0 2 78 14 0 2 69 7 0 2 51 Y 37 0.213 3 +Contig1_chr21_10805534_10806399 766 A G 146.0 chr21 10806301 G 10 0 2 57 6 0 2 45 9 0 2 54 6 0 2 45 7 0 2 48 5 0 2 42 Y 20 0.319 0 +Contig46_chr21_21029492_21030645 443 C T 5.37 chr21 21029910 C 15 0 2 72 11 0 2 60 16 0 2 75 15 0 2 72 13 0 2 66 6 0 2 45 Y 96 3.737 0 +Contig129_chr21_31045749_31046924 381 A G 129.0 chr21 31046141 A 19 0 2 84 8 0 2 51 23 0 2 96 12 0 2 63 15 0 2 72 18 0 2 81 Y 69 0.028 2 +Contig23_chr21_31651123_31651986 840 C T 71.3 chr21 31651957 T 6 0 2 45 9 0 2 54 8 0 2 51 10 0 2 57 4 0 2 39 7 0 2 48 Y 105 2.977 3 +Contig64_chr21_43341847_43342031 84 T C 114.0 chr21 43341926 T 11 0 2 60 9 0 2 54 10 0 2 57 6 0 2 45 6 0 2 45 7 0 2 48 N 10 3.954 2 +Contig60_chr21_43475347_43475824 175 C T 8.05 chr21 43475551 T 6 0 2 45 7 0 2 48 13 0 2 66 6 0 2 45 14 0 2 69 14 0 2 69 N 45 0.058 0 +Contig64_chr21_45377513_45377872 19 C T 60.7 chr21 -1 N 3 0 2 36 2 0 2 33 1 0 2 30 0 0 -1 0 3 0 2 36 1 0 2 30 N -1 +99. 1 +Contig159_chr22_7896450_7896974 109 G C 151.0 chr22 7896570 G 16 0 2 75 5 7 1 62 14 0 2 69 16 0 2 75 13 0 2 66 13 0 2 66 Y 16 0.465 0 +Contig46_chr22_9416920_9417467 381 G A 145.0 chr22 9417259 G 10 0 2 57 9 0 2 54 10 0 2 57 6 0 2 45 13 0 2 66 7 0 2 48 Y 154 0.242 0 +Contig86_chr22_9440787_9441725 713 T G 119.0 chr22 9441488 G 6 0 2 45 12 0 2 63 10 0 2 57 11 0 2 60 13 0 2 66 16 0 2 75 Y 132 0.218 0 +Contig16_chr22_15636960_15637372 236 A C 9.79 chr22 15637192 T 4 0 2 39 5 0 2 42 12 0 2 63 7 0 2 48 6 0 2 45 11 0 2 60 Y 5 2.163 0 +Contig4_chr22_16114310_16114546 128 G C 101.0 chr22 16114432 G 10 0 2 57 13 0 2 66 20 0 2 87 20 0 2 87 16 0 2 75 9 0 2 54 N 19 0.526 0 +Contig23_chr22_34612023_34612568 167 C G 92.3 chr22 34612181 C 11 0 2 60 18 0 2 81 13 0 2 66 8 0 2 51 12 0 2 63 14 0 2 69 Y 7 0.409 0 +Contig4_chr22_38252245_38253712 799 A C 159.0 chr22 38253064 A 18 0 2 81 15 0 2 72 15 0 2 72 20 0 2 87 27 0 2 108 15 0 2 72 Y 90 4.330 0 +Contig122_chr22_48412466_48414788 1888 C T 125.0 chr22 48414355 T 16 0 2 75 15 0 2 72 16 0 2 75 14 0 2 72 12 0 2 63 7 0 2 48 N 42 0.122 0 +Contig77_chr22_49764414_49764875 353 C A 148.0 chr22 49764777 C 7 4 1 65 18 0 2 81 16 0 2 75 20 0 2 87 4 3 1 52 9 4 1 67 Y 12 0.941 0 +Contig26_chr22_57817664_57819633 1453 A G 150.0 chr22 57819121 G 9 0 2 54 9 0 2 54 13 0 2 66 15 0 2 72 11 0 2 60 14 0 2 69 N 15 0.471 1 +Contig348_chr22_62406104_62406495 189 C A 134.0 chr22 62406302 A 9 0 2 54 14 0 2 69 11 0 2 60 10 0 2 57 12 0 2 63 6 0 2 45 Y 5 0.912 0 +Contig133_chr23_3525134_3526502 1223 A G 201.0 chr23 3526387 A 11 0 2 60 13 0 2 66 23 0 2 96 21 0 2 90 13 0 2 66 10 0 2 57 Y 61 1.359 0 +Contig111_chr23_7058063_7058181 107 G A 108.0 chr23 7058162 A 8 0 2 51 8 0 2 51 7 0 2 48 2 0 2 33 5 0 2 42 6 0 2 45 N 3 +99. 0 +Contig79_chr23_7844129_7844837 110 C A 141.0 chr23 7844237 T 13 0 2 66 15 0 2 72 17 0 2 78 12 0 2 63 15 0 2 72 16 0 2 75 Y 40 0.339 0 +Contig38_chr23_9201002_9201725 597 C T 155.0 chr23 9201609 T 17 0 2 78 8 0 2 51 13 0 2 66 5 0 2 42 11 0 2 60 7 0 2 48 Y 167 0.633 1 +Contig33_chr23_20672540_20674320 347 T A 91.4 chr23 20672885 A 11 0 2 60 14 0 2 69 15 0 2 72 7 0 2 48 12 0 2 63 18 0 2 81 Y 31 0.452 1 +Contig35_chr23_28447813_28449115 70 T A 21.3 chr23 28447881 T 9 0 2 54 8 0 2 51 10 0 2 57 9 0 2 54 10 0 2 57 12 0 2 63 N 251 0.163 1 +Contig51_chr23_30590939_30591162 140 C T 142.0 chr23 30591080 C 14 0 2 69 4 0 2 39 10 0 2 57 12 0 2 63 14 0 2 69 4 0 2 39 N 13 1.658 0 +Contig57_chr23_32216351_32216721 179 T G 143.0 chr23 32216534 T 15 0 2 72 15 0 2 72 23 0 2 96 13 0 2 66 16 0 2 75 15 0 2 72 N 32 1.387 1 +Contig93_chr23_35744841_35745791 40 A T 30.4 chr23 35744880 T 6 0 2 45 7 0 2 48 7 0 2 48 2 0 2 33 5 0 2 42 5 0 2 42 Y 50 2.173 0 +Contig99_chr23_42543966_42544147 14 G A 357.0 chr23 42543980 G 4 0 2 39 2 0 2 33 3 0 2 36 3 0 2 36 1 0 2 30 2 0 2 33 N 69 +99. 0 +Contig32_chr23_48285289_48286638 186 T C 176.0 chr23 48285470 T 18 0 2 81 12 0 2 63 16 0 2 75 13 0 2 66 9 0 2 54 9 0 2 54 Y 4 4.238 1 +Contig50_chr24_22515247_22516072 761 C T 243.0 chr24 22515981 T 11 0 2 60 10 0 2 57 8 0 2 51 9 0 2 54 18 0 2 81 8 0 2 51 Y 1 0.190 0 +Contig92_chr24_28935897_28936321 13 G A 47.1 chr24 -1 N 2 0 2 33 1 0 2 30 0 0 -1 0 0 0 -1 0 1 0 2 30 0 0 -1 0 Y -1 +99. 2 +Contig84_chr24_29196623_29199644 466 C T 126.0 chr24 29197091 T 7 0 2 48 11 0 2 60 8 0 2 51 7 0 2 48 11 0 2 60 15 0 2 72 Y 42 0.215 0 +Contig35_chr24_30150986_30151507 492 A C 114.0 chr24 30151448 A 5 0 2 42 2 0 2 33 2 0 2 33 3 0 2 36 3 0 2 36 5 0 2 42 N 41 2.587 6 +Contig61_chr24_30465488_30465834 149 G T 68.2 chr24 30465637 G 13 0 2 66 4 2 2 11 18 0 2 81 11 0 2 60 11 0 2 60 9 0 2 54 N 99 0.105 2 +Contig145_chr24_34778364_34778898 163 T C 372.0 chr24 34778541 C 10 0 2 57 8 0 2 51 12 0 2 63 12 0 2 63 6 1 2 31 7 0 2 48 Y 40 0.037 0 +Contig34_chr24_36147443_36150244 2679 C T 140.0 chr24 36150125 C 13 0 2 66 7 0 2 48 14 0 2 69 14 0 2 69 10 0 2 57 13 0 2 66 N 282 0.099 1 +Contig164_chr24_46598127_46599206 84 C T 105.0 chr24 46598214 C 13 0 2 66 12 0 2 63 15 0 2 72 15 0 2 72 11 0 2 60 8 0 2 51 Y 22 1.262 1 +Contig144_chr25_4011170_4013134 541 A G 160.0 chr25 4011690 A 12 0 2 63 17 0 2 78 13 0 2 66 13 0 2 66 13 0 2 66 13 0 2 66 Y 5 0.087 0 +Contig81_chr25_6103472_6104760 699 G A 378.0 chr25 6104190 A 14 0 2 69 16 0 2 75 13 0 2 66 11 0 2 60 11 0 2 60 12 0 2 63 Y 33 0.789 2 +Contig152_chr25_7486442_7487609 75 A G 11.6 chr25 7486515 A 17 0 2 78 13 0 2 66 8 0 2 51 16 0 2 75 8 0 2 51 6 0 2 45 N 2 0.158 0 +Contig24_chr25_7695778_7698612 2714 C T 130.0 chr25 7698446 C 16 0 2 75 13 0 2 66 22 0 2 93 17 0 2 78 10 0 2 57 17 0 2 78 Y 27 0.346 0 +Contig89_chr25_8635170_8636009 586 G C 209.0 chr25 8635744 G 13 0 2 66 13 0 2 66 21 0 2 93 14 0 2 69 15 0 2 72 15 0 2 72 Y 14 0.067 0 +Contig77_chr25_10796299_10796481 2 T C 17.3 chr25 -1 N 1 0 2 30 0 0 -1 0 1 0 2 30 0 0 -1 0 0 0 -1 0 0 0 -1 0 N -1 +99. 0 +Contig73_chr25_14177327_14177474 125 A C 6.85 chr25 14177464 A 0 0 -1 0 0 0 -1 0 0 0 -1 0 0 0 -1 0 0 0 -1 0 0 0 -1 0 N 27 +99. 1 +Contig59_chr25_18196776_18197707 785 G A 112.0 chr25 18197551 G 8 10 1 42 27 0 2 108 21 0 2 90 18 0 2 81 10 0 2 57 14 0 2 69 N 36 3.625 0 +Contig103_chr25_38891221_38892140 407 G A 131.0 chr25 38891644 G 8 0 2 51 14 0 2 69 18 0 2 81 8 0 2 51 8 0 2 51 11 0 2 60 Y 149 0.167 4 +Contig84_chr25_42407960_42408708 55 C T 119.0 chr25 42408013 C 6 0 2 45 9 0 2 54 11 0 2 60 9 0 2 54 7 0 2 48 8 0 2 51 Y 11 0.121 0 +Contig73_chr25_43562500_43564110 955 T C 52.1 chr25 43563469 C 9 0 2 57 4 0 2 39 6 0 2 45 5 0 2 42 7 0 2 48 10 0 2 57 Y 4 1.406 0 +Contig37_chr25_51074433_51074885 170 A G 102.0 chr25 51074589 G 11 0 2 60 7 0 2 48 6 0 2 45 15 0 2 72 9 0 2 54 7 0 2 48 Y 68 0.207 1 +Contig204_chr26_4311195_4311778 170 C T 16.9 chr26 4311363 T 20 0 2 87 8 0 2 51 13 0 2 66 18 0 2 81 11 0 2 60 14 0 2 69 N 35 0.085 0 +Contig122_chr26_7622321_7623491 106 C G 139.0 chr26 7622423 C 3 0 2 36 9 0 2 54 10 0 2 57 12 0 2 63 9 0 2 54 5 0 2 42 N 19 0.458 0 +Contig11_chr26_11062142_11062902 707 C A 108.0 chr26 11062836 T 7 0 2 48 8 0 2 51 16 0 2 75 10 0 2 57 6 0 2 45 14 0 2 69 Y -1 4.709 0 +Contig133_chr26_17695661_17696368 39 T G 98.7 chr26 17695700 T 10 0 2 57 3 0 2 36 11 0 2 60 9 0 2 54 2 0 2 33 1 0 2 30 N 85 3.402 0 +Contig157_chr26_23894107_23895229 25 C T 50.2 chr26 23894140 C 0 0 -1 0 4 0 2 39 2 0 2 33 4 0 2 39 3 0 2 36 3 0 2 36 Y 51 +99. 0 +Contig146_chr26_26622638_26623906 574 G A 186.0 chr26 26623219 A 11 0 2 60 12 0 2 63 9 0 2 54 11 0 2 60 9 0 2 54 12 0 2 63 Y 1 0.318 0 +Contig8_chr26_27834126_27834326 140 G A 41.7 chr26 27834268 G 13 0 2 66 7 0 2 48 13 0 2 66 11 0 2 60 12 0 2 63 6 0 2 45 N 29 0.142 1 +Contig78_chr26_31128839_31129005 123 T C 145.0 chr26 -1 N 11 0 2 60 3 0 2 36 7 0 2 48 8 0 2 51 10 0 2 46 7 0 2 48 N -1 1.230 1 +Contig28_chr26_32935355_32935833 289 T C 77.9 chr26 32935638 T 15 0 2 72 22 0 2 93 15 0 2 72 9 0 2 54 15 0 2 72 17 0 2 78 Y 10 2.258 1 +Contig36_chr26_36606876_36607240 115 A T 139.0 chr26 36606979 A 1 0 2 30 7 0 2 48 14 0 2 69 13 0 2 66 9 0 2 54 3 0 2 36 Y 8 0.071 0 +Contig135_chr27_6853874_6854079 158 C T 116.0 chr27 6854032 T 18 0 2 81 19 0 2 84 13 0 2 66 7 0 2 48 8 0 2 51 11 0 2 60 N 4 0.060 1 +Contig47_chr27_11777710_11777915 25 A G 67.3 chr27 11777731 A 3 0 2 36 5 0 2 42 6 0 2 45 10 0 2 57 9 0 2 54 6 0 2 45 N 97 +99. 0 +Contig23_chr27_14633002_14633153 23 G A 128.0 chr27 14633023 A 3 0 2 36 4 0 2 39 5 0 2 42 5 0 2 42 3 0 2 36 2 0 2 33 N 240 3.881 0 +Contig31_chr27_14987233_14988055 630 A G 48.5 chr27 14987850 G 10 0 2 57 2 0 2 33 4 0 2 39 4 0 2 39 1 0 2 30 4 0 2 39 Y 9 0.089 1 +Contig29_chr27_15428166_15429413 380 T C 140.0 chr27 15428539 T 15 0 2 72 15 0 2 72 17 0 2 78 15 0 2 72 15 0 2 72 15 0 2 72 Y 47 0.916 1 +Contig31_chr27_19519489_19520891 129 G T 14.9 chr27 19519624 T 12 0 2 63 19 0 2 84 20 0 2 87 16 0 2 75 10 0 2 57 11 0 2 60 Y 48 2.756 0 +Contig64_chr27_34654435_34654621 132 C A 115.0 chr27 34654567 T 2 0 2 33 2 0 2 33 5 0 2 42 3 0 2 36 3 0 2 36 8 0 2 51 N 12 0.297 1 +Contig35_chr27_40596169_40596445 20 G C 133.0 chr27 40596189 G 8 0 2 51 3 0 2 36 4 0 2 39 2 0 2 33 4 0 2 39 4 0 2 39 Y 4 +99. 1 +Contig85_chr27_45471750_45472022 211 G A 53.1 chr27 45471964 G 18 0 2 81 10 0 2 57 15 0 2 72 0 13 0 36 16 0 2 75 14 0 2 69 N 75 2.502 1 +Contig131_chr28_6481806_6483783 138 C T 36.2 chr28 6481953 C 12 0 2 63 12 0 2 63 20 0 2 87 11 0 2 60 10 0 2 57 12 0 2 63 Y 10 0.387 0 +Contig141_chr28_10027332_10028242 780 T G 74.8 chr28 10028095 T 10 0 2 57 11 0 2 60 14 0 2 69 10 0 2 57 7 0 2 48 9 0 2 54 Y 19 3.348 0 +Contig144_chr28_15468203_15470548 743 G A 20.0 chr28 15468942 G 13 0 2 66 12 0 2 63 10 0 2 57 11 0 2 60 16 0 2 75 7 0 2 48 N 14 0.053 0 +Contig47_chr28_21311718_21312366 541 G A 116.0 chr28 21312258 G 9 0 2 54 6 0 2 45 12 0 2 63 6 0 2 45 5 0 2 45 12 0 2 63 N 9 0.240 0 +Contig60_chr28_30197166_30197364 92 T C 164.0 chr28 30197258 T 10 0 2 57 13 0 2 66 15 0 2 72 16 0 2 75 12 0 2 63 11 0 2 60 N 369 1.139 0 +Contig201_chr28_36339953_36341322 260 C T 6.36 chr28 36340213 T 4 0 2 39 0 0 -1 0 2 0 2 33 2 0 2 33 3 0 2 36 4 0 2 39 N 4 0.183 0 +Contig175_chr28_36441165_36441915 68 T C 3.83 chr28 36441234 T 4 4 1 15 6 0 2 45 12 0 2 63 15 0 2 72 6 0 2 45 9 0 2 54 N 4 1.610 2 +Contig29_chr29_4726399_4727143 559 A T 163.0 chr29 4726955 A 15 0 2 72 18 0 2 81 18 0 2 81 16 0 2 75 11 0 2 60 14 0 2 72 Y 161 3.114 0 +Contig48_chr29_13129286_13130137 232 A G 92.2 chr29 13129514 G 13 0 2 66 11 0 2 60 19 0 2 84 16 0 2 75 11 0 2 60 17 0 2 78 Y 337 2.581 1 +Contig64_chr29_15736891_15737257 344 T C 40.4 chr29 15737233 C 1 0 2 30 0 0 -1 0 0 0 -1 0 2 0 2 33 0 0 -1 0 0 0 -1 0 N 58 +99. 0 +Contig33_chr29_17000374_17000921 71 C T 48.6 chr29 17000441 - 4 0 2 39 9 0 2 54 12 0 2 66 10 0 2 57 7 0 2 48 4 0 2 39 N 26 5.491 0 +Contig34_chr29_17581796_17584016 2105 C T 126.0 chr29 17583890 T 14 0 2 69 11 0 2 60 18 0 2 81 12 0 2 63 10 0 2 57 10 0 2 57 Y 22 2.208 0 +Contig19_chr29_20976080_20977761 1007 G A 115.0 chr29 20977076 G 19 0 2 84 22 0 2 93 22 0 2 93 22 0 2 93 11 0 2 60 13 0 2 66 Y 4 1.915 0 +Contig51_chr29_21149853_21150467 266 C T 146.0 chr29 21150118 C 12 0 2 63 12 0 2 63 23 0 2 96 14 0 2 69 13 0 2 66 10 0 2 57 Y 4 0.051 0 +Contig1_chr30_5992217_5993068 106 C T 129.0 chr30 5992319 C 10 0 2 57 11 0 2 60 7 0 2 48 11 0 2 60 10 0 2 57 12 0 2 63 Y 76 1.079 0 +Contig1_chr30_8232878_8233406 402 C T 127.0 chr30 8233264 C 8 0 2 51 19 0 2 84 16 0 2 75 18 0 2 81 10 0 2 57 14 0 2 69 Y 358 5.283 0 +Contig108_chr30_9436961_9437520 546 C T 39.8 chr30 9437502 C 7 0 2 48 5 0 2 42 2 0 2 33 7 0 2 48 5 0 2 42 7 0 2 48 Y 64 +99. 0 +Contig165_chr30_25804389_25804926 190 T C 126.0 chr30 25804592 C 3 0 2 36 8 0 2 51 7 0 2 48 10 0 2 57 7 0 2 48 4 0 2 39 Y 113 0.329 0 +Contig193_chr30_27495616_27496125 434 C A 234.0 chr30 27496024 C 13 0 2 66 16 0 2 75 25 0 2 102 16 0 2 75 13 0 2 66 14 0 2 69 Y 76 2.621 0 +Contig114_chr30_33636712_33637208 34 C T 142.0 chr30 33636744 C 7 0 2 48 4 1 2 20 6 0 2 45 6 0 2 45 3 4 1 29 5 0 2 42 Y 14 8.028 0 +Contig38_chr31_5164423_5166573 2074 C T 134.0 chr31 5166501 T 13 0 2 66 10 0 2 57 17 0 2 78 11 0 2 60 17 0 2 78 10 0 2 57 Y 58 +99. 0 +Contig6_chr31_9649308_9650149 431 G T 162.0 chr31 9649742 G 31 0 2 120 23 0 2 96 17 0 2 78 17 0 2 78 10 0 2 57 16 0 2 75 Y 98 2.200 0 +Contig85_chr31_12242872_12245082 38 G C 92.4 chr31 12242910 G 1 0 2 30 6 0 2 45 9 0 2 54 8 0 2 51 5 0 2 42 9 0 2 54 N 2 2.340 0 +Contig7_chr31_12384974_12386400 305 C T 69.6 chr31 12385267 C 6 0 2 45 10 0 2 57 11 0 2 60 11 0 2 60 9 0 2 54 12 0 2 63 Y 44 1.165 0 +Contig90_chr31_17267583_17267778 81 C A 143.0 chr31 17267665 C 20 0 2 87 6 0 2 45 14 0 2 72 22 0 2 93 17 0 2 78 15 0 2 72 N 7 0.565 0 +Contig68_chr31_20000241_20000597 215 C T 131.0 chr31 20000454 T 0 0 -1 0 0 0 -1 0 0 0 -1 0 0 0 -1 0 0 0 -1 0 0 0 -1 0 Y 5 3.383 1 +Contig137_chr31_23357653_23358568 885 G A 119.0 chr31 23358545 G 5 0 2 42 3 0 2 36 3 0 2 36 2 0 2 33 3 0 2 36 4 0 2 39 Y 11 +99. 0 +Contig17_chr31_26433828_26434459 498 T C 9.79 chr31 26434322 T 18 0 2 81 10 0 2 57 15 0 2 72 13 0 2 66 16 0 2 75 15 0 2 72 Y 137 4.814 0 +Contig9_chr32_19479532_19479735 12 A G 20.7 chr32 19479544 A 1 0 2 30 2 0 2 33 1 0 2 30 5 0 2 42 3 0 2 36 3 0 2 36 N 17 +99. 0 +Contig30_chr32_25902721_25905783 208 C G 162.0 chr32 25902927 G 11 0 2 60 13 0 2 66 11 0 2 60 12 0 2 63 7 0 2 48 11 0 2 60 Y 145 0.322 2 +Contig7_chr32_27789513_27789926 20 G A 7.19 chr32 27789530 A 0 0 -1 0 4 0 2 39 4 0 2 39 4 0 2 39 2 0 2 33 6 0 2 45 Y 14 +99. 0 +Contig42_chr32_38900713_38901320 320 A G 134.0 chr32 38901021 T 12 0 2 63 10 0 2 57 9 11 1 104 5 0 2 42 19 0 2 84 7 6 1 56 Y 71 0.165 0 +Contig18_chr33_22207246_22209159 1363 G T 51.5 chr33 22208619 - 16 0 2 75 8 0 2 51 11 0 2 60 10 0 2 57 15 0 2 72 12 0 2 63 Y 59 2.560 0 +Contig104_chr33_22483642_22484187 424 C T 140.0 chr33 22484054 T 13 0 2 66 16 0 2 75 9 0 2 54 15 0 2 72 13 0 2 66 10 0 2 57 Y 36 0.404 0 +Contig170_chr33_26189421_26189940 292 T C 98.4 chr33 26189703 T 21 0 2 90 13 0 2 66 15 0 2 72 13 0 2 66 19 0 2 84 13 0 2 66 Y 23 0.307 0 +Contig113_chr34_13341080_13341643 236 C T 90.7 chr34 13341316 C 4 0 2 39 2 0 2 33 8 0 2 51 4 0 2 39 8 0 2 51 3 0 2 36 Y 47 0.412 3 +Contig405_chr34_14415672_14415979 59 A G 36.2 chr34 14415731 G 8 0 2 51 2 0 2 33 8 0 2 51 6 0 2 48 3 0 2 36 7 0 2 48 Y 45 0.405 1 +Contig21_chr34_16422980_16425681 2009 G A 19.4 chr34 16424960 G 0 0 -1 0 0 0 -1 0 0 0 -1 0 5 0 2 42 0 0 -1 0 0 0 -1 0 Y 28 0.196 0 +Contig41_chr34_16544482_16545449 46 T C 102.0 chr34 16544523 T 5 0 2 42 11 0 2 60 6 0 2 45 0 2 0 3 7 0 2 48 8 0 2 51 Y 215 1.156 0 +Contig8_chr34_18474513_18475673 1122 C A 129.0 chr34 18475628 A 8 0 2 51 15 0 2 72 13 0 2 66 17 0 2 78 13 0 2 66 6 0 2 45 Y 61 0.123 2 +Contig152_chr34_31794848_31795540 242 G A 93.2 chr34 31795093 G 11 0 2 60 24 0 2 99 17 0 2 78 15 0 2 72 18 0 2 81 17 0 2 78 Y 123 2.780 0 +Contig28_chr34_41708848_41712034 1381 A G 78.2 chr34 41710232 A 11 0 2 60 17 0 2 78 15 0 2 72 16 0 2 75 15 0 2 72 14 0 2 69 Y 236 0.234 0 +Contig85_chr34_42798284_42800584 1845 C T 171.0 chr34 42800126 T 5 0 2 42 7 0 2 48 6 0 2 45 7 0 2 48 6 0 2 45 2 0 2 33 Y 5 2.787 0 +Contig47_chr35_3666773_3667898 348 G T 124.0 chr35 3667121 G 9 0 2 54 20 0 2 87 18 0 2 81 15 0 2 72 12 0 2 63 14 0 2 69 Y 285 0.235 0 +Contig195_chr35_15722500_15722741 205 G A 4.08 chr35 15722718 G 3 0 2 36 5 0 2 42 1 0 2 30 6 0 2 45 1 0 2 30 1 0 2 30 N 43 +99. 0 +Contig101_chr35_19513178_19513697 62 C T 112.0 chr35 19513238 C 12 0 2 63 7 0 2 48 13 0 2 66 7 0 2 48 5 0 2 42 8 0 2 51 N 115 3.135 0 +Contig19_chr35_23887144_23888282 90 C A 10.1 chr35 23887242 - 3 3 1 12 4 4 1 19 8 6 1 37 4 3 1 11 8 3 2 7 9 3 2 11 Y 105 0.199 0 +Contig47_chr35_24382042_24382526 33 G A 87.0 chr35 24382076 G 5 0 2 42 4 0 2 39 6 0 2 45 7 0 2 48 4 0 2 39 2 0 2 33 Y 71 +99. 0 +Contig77_chr35_24796947_24797172 65 A G 52.1 chr35 24797009 A 7 0 2 48 5 0 2 42 8 0 2 51 6 0 2 45 12 0 2 63 10 0 2 57 N 11 1.401 3 +Contig74_chr35_25394343_25394813 303 A T 221.0 chr35 25394646 G 23 0 2 96 15 0 2 72 25 0 2 105 7 7 1 49 18 0 2 81 16 0 2 75 Y 58 4.298 0 +Contig5_chr36_4562983_4563634 343 C T 151.0 chr36 4563324 T 20 0 2 87 20 0 2 87 23 0 2 96 24 0 2 99 9 0 2 54 8 0 2 51 Y 40 1.169 0 +Contig75_chr36_7885319_7885588 53 G A 25.7 chr36 7885372 G 10 0 2 57 8 0 2 51 13 0 2 66 7 0 2 48 4 0 2 39 7 0 2 48 N 7 2.653 0 +Contig184_chr36_18956191_18958552 187 A G 11.5 chr36 18956371 G 10 0 2 57 11 0 2 60 21 0 2 90 14 0 2 69 7 0 2 48 4 0 2 39 N 278 1.434 2 +Contig12_chr36_21557176_21557828 513 T A 159.0 chr36 21557695 A 11 0 2 60 14 0 2 69 21 0 2 90 12 0 2 63 15 0 2 72 11 0 2 60 Y 55 0.222 0 +Contig2_chr36_22436067_22436794 653 C T 73.0 chr36 22436730 C 11 0 2 60 16 0 2 75 13 0 2 66 11 0 2 60 21 0 2 90 21 0 2 90 Y 9 0.534 0 +Contig133_chr36_32954045_32955409 136 A G 116.0 chr36 32954182 A 16 0 2 75 15 0 2 72 20 0 2 87 11 0 2 60 18 0 2 81 13 0 2 66 Y 74 3.772 1 +Contig53_chr37_6665763_6665919 116 C T 111.0 chr37 6665875 C 9 0 2 54 9 0 2 54 5 0 2 42 9 0 2 54 8 0 2 51 10 0 2 57 N 15 10.875 1 +Contig42_chr37_9589176_9591269 252 G A 25.1 chr37 9589430 G 10 0 2 40 13 0 2 66 18 0 2 81 21 0 2 90 9 0 2 54 17 0 2 78 N 67 1.170 2 +Contig2_chr37_17134963_17136513 1140 A C 158.0 chr37 17136092 A 14 0 2 69 24 0 2 99 17 0 2 78 16 0 2 75 15 0 2 75 13 0 2 66 Y 12 0.053 1 +Contig18_chr37_17147806_17149851 291 T G 112.0 chr37 17148084 T 4 6 1 45 16 0 2 75 17 0 2 78 14 0 2 69 22 0 2 93 13 0 2 66 Y 41 4.442 0 +Contig64_chr37_17606895_17607534 565 C T 30.2 chr37 17607439 A 9 0 2 54 16 0 2 75 20 0 2 87 14 0 2 69 16 0 2 75 10 0 2 57 N 20 1.622 0 +Contig126_chr37_21587881_21590621 373 G T 132.0 chr37 21588256 G 11 0 2 60 11 0 2 60 23 0 2 96 12 0 2 63 8 0 2 51 18 0 2 81 Y 12 0.549 0 +Contig2_chr37_31197993_31198256 182 C T 39.6 chr37 31198171 T 6 0 2 45 10 0 2 57 7 0 2 48 9 0 2 54 10 0 2 57 12 0 2 63 N 2 0.595 0 +Contig46_chr37_31852376_31853555 825 A G 111.0 chr37 31853191 G 19 0 2 84 14 0 2 69 15 0 2 72 7 0 2 48 8 0 2 51 16 0 2 75 Y 17 0.128 1 +Contig7_chr38_12217200_12218387 1163 A T 44.4 chr38 12218353 A 11 0 2 60 13 0 2 66 17 0 2 78 10 0 2 57 11 0 2 60 11 0 2 60 Y 67 +99. 0 +Contig15_chr38_12282020_12282253 150 C T 156.0 chr38 12282164 A 17 0 2 78 11 0 2 60 19 0 2 84 14 0 2 69 5 0 2 42 14 0 2 69 Y 26 2.952 1 +Contig4_chr38_14807432_14807747 275 A G 36.5 chr38 14807715 G 1 0 2 30 2 0 2 33 2 0 2 33 4 0 2 39 1 0 2 30 0 0 -1 0 Y 28 +99. 1 +Contig6_chr38_16185744_16186110 325 A G 74.9 chr38 16186061 A 5 0 2 42 3 0 2 36 9 0 2 54 7 0 2 48 1 0 2 30 12 0 2 63 Y 40 +99. 0 +Contig265_chrX_2689247_2689484 114 C G 103.0 chrX 2689356 C 11 0 2 60 9 0 2 54 13 0 2 66 16 0 2 75 14 0 2 69 10 0 2 57 N 2 9.232 1 +Contig122_chrX_6026976_6027327 330 C T 79.4 chrX 6027303 C 3 0 2 36 3 0 2 36 3 0 2 36 4 0 2 39 3 0 2 36 6 0 2 45 Y 30 +99. 0 +Contig15_chrX_15659909_15660340 15 A C 14.9 chrX 15659924 C 1 0 2 30 1 0 2 30 3 0 2 36 6 0 2 45 2 0 2 33 0 0 -1 0 Y 216 +99. 1 +Contig12_chrX_23243561_23244412 479 C G 67.7 chrX 23244037 C 2 0 2 33 4 2 2 8 2 6 1 43 7 0 2 48 6 0 2 45 4 0 2 39 Y 208 1.620 0 +Contig113_chrX_26287829_26288398 385 C T 59.6 chrX 26288213 C 9 0 2 54 9 0 2 54 17 0 2 78 11 0 2 60 3 8 1 44 4 0 2 39 N 13 0.077 0 +Contig186_chrX_29118735_29118939 192 G A 7.01 chrX 29118931 G 1 0 2 30 7 0 2 48 4 0 2 39 5 0 2 42 8 0 2 51 4 0 2 39 N 50 +99. 0 +Contig237_chrX_31256648_31257654 165 T A 246.0 chrX 31256814 T 7 0 2 48 23 0 2 96 19 0 2 84 17 0 2 78 14 0 2 69 8 0 2 51 Y 37 1.481 0 +Contig25_chrX_40729418_40730089 332 C T 31.2 chrX 40729745 C 0 0 -1 0 2 0 2 33 4 0 2 39 5 0 2 42 3 0 2 36 3 0 2 36 Y 34 0.212 0 +Contig90_chrX_57430715_57431566 548 C T 116.0 chrX 57431266 T 9 0 2 54 18 0 2 81 13 0 2 66 14 0 2 69 8 0 2 54 7 0 2 48 Y 261 0.154 1 +Contig133_chrX_84833782_84834125 182 G A 69.7 chrX 84833962 G 5 0 2 42 18 0 2 81 12 0 2 63 19 0 2 84 6 3 1 27 7 0 2 48 N 619 0.278 0 +Contig129_chrX_90586053_90586467 135 A T 120.0 chrX 90586195 A 1 0 2 30 6 0 2 45 8 0 2 51 5 0 2 42 1 0 2 30 2 0 2 33 N 637 0.245 0 +Contig125_chrX_93319363_93320877 349 A C 145.0 chrX 93319721 A 4 0 2 39 6 0 2 45 11 0 2 60 10 0 2 57 13 0 2 66 6 0 2 45 Y 59 1.686 0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/add_fst_column/add_fst_column.wsf Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,21 @@ +Contig113_chr5_11052263_11052603 28 C T 38.2 chr5 11052280 C 1 2 1 12 3 2 1 10 5 0 2 42 2 1 2 13 3 0 2 36 8 0 2 51 Y 161 +99. 0 0.1636 +Contig215_chr5_70946445_70947428 363 T G 28.2 chr5 70946809 C 4 0 2 39 0 5 0 12 9 0 2 54 6 0 2 45 3 3 2 1 9 0 2 54 N 43 0.153 0 0.3846 +Contig132_chr7_20426224_20428145 1815 A G 28.3 chr7 20428041 A 11 1 2 43 12 0 2 63 19 0 2 84 23 0 2 96 14 0 2 69 10 0 2 57 N 11 0.264 0 0.0213 +Contig30_chr8_17147743_17147923 13 G A 105.0 chr8 17147756 A 1 3 1 19 1 0 2 30 3 0 2 36 1 0 2 30 1 0 2 30 3 0 2 36 N 6 +99. 0 0.4286 +Contig44_chr8_71186368_71188207 1455 G T 147.0 chr8 71187818 G 4 10 1 74 3 0 2 36 20 0 2 87 12 0 2 63 8 0 2 51 10 0 2 57 Y 88 0.036 0 0.4167 +Contig103_chr11_8844784_8845095 214 T G 135.0 chr11 8844993 T 1 1 2 12 10 0 2 57 5 4 1 26 2 3 1 13 2 7 1 34 1 1 2 13 Y 75 0.731 0 0.2101 +Contig37_chr13_15910164_15910426 245 G A 32.9 chr13 -1 N 3 4 1 41 4 0 2 39 3 0 2 36 4 0 2 39 3 0 2 36 10 0 2 57 N -1 2.159 1 0.2222 +Contig50_chr17_12247973_12249183 889 G T 47.6 chr17 12248878 G 0 1 2 9 8 0 2 51 9 2 2 21 7 2 2 21 15 0 2 72 0 3 0 9 Y 1 1.181 0 0.0150 +Contig159_chr22_7896450_7896974 109 G C 151.0 chr22 7896570 G 16 0 2 75 5 7 1 62 14 0 2 69 16 0 2 75 13 0 2 66 13 0 2 66 Y 16 0.465 0 0.1429 +Contig77_chr22_49764414_49764875 353 C A 148.0 chr22 49764777 C 7 4 1 65 18 0 2 81 16 0 2 75 20 0 2 87 4 3 1 52 9 4 1 67 Y 12 0.941 0 0.0741 +Contig61_chr24_30465488_30465834 149 G T 68.2 chr24 30465637 G 13 0 2 66 4 2 2 11 18 0 2 81 11 0 2 60 11 0 2 60 9 0 2 54 N 99 0.105 2 0.0556 +Contig59_chr25_18196776_18197707 785 G A 112.0 chr25 18197551 G 8 10 1 42 27 0 2 108 21 0 2 90 18 0 2 81 10 0 2 57 14 0 2 69 N 36 3.625 0 0.1250 +Contig85_chr27_45471750_45472022 211 G A 53.1 chr27 45471964 G 18 0 2 81 10 0 2 57 15 0 2 72 0 13 0 36 16 0 2 75 14 0 2 69 N 75 2.502 1 0.3023 +Contig175_chr28_36441165_36441915 68 T C 3.83 chr28 36441234 T 4 4 1 15 6 0 2 45 12 0 2 63 15 0 2 72 6 0 2 45 9 0 2 54 N 4 1.610 2 0.1667 +Contig114_chr30_33636712_33637208 34 C T 142.0 chr30 33636744 C 7 0 2 48 4 1 2 20 6 0 2 45 6 0 2 45 3 4 1 29 5 0 2 42 Y 14 8.028 0 0.0435 +Contig42_chr32_38900713_38901320 320 A G 134.0 chr32 38901021 T 12 0 2 63 10 0 2 57 9 11 1 104 5 0 2 42 19 0 2 84 7 6 1 56 Y 71 0.165 0 0.2821 +Contig41_chr34_16544482_16545449 46 T C 102.0 chr34 16544523 T 5 0 2 42 11 0 2 60 6 0 2 45 0 2 0 3 7 0 2 48 8 0 2 51 Y 215 1.156 0 0.1429 +Contig19_chr35_23887144_23888282 90 C A 10.1 chr35 23887242 - 3 3 1 12 4 4 1 19 8 6 1 37 4 3 1 11 8 3 2 7 9 3 2 11 Y 105 0.199 0 0.0051 +Contig74_chr35_25394343_25394813 303 A T 221.0 chr35 25394646 G 23 0 2 96 15 0 2 72 25 0 2 105 7 7 1 49 18 0 2 81 16 0 2 75 Y 58 4.298 0 0.0986 +Contig18_chr37_17147806_17149851 291 T G 112.0 chr37 17148084 T 4 6 1 45 16 0 2 75 17 0 2 78 14 0 2 69 22 0 2 93 13 0 2 66 Y 41 4.442 0 0.1304 +Contig12_chrX_23243561_23244412 479 C G 67.7 chrX 23244037 C 2 0 2 33 4 2 2 8 2 6 1 43 7 0 2 48 6 0 2 45 4 0 2 39 Y 208 1.620 0 0.0256
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/average_fst/average_fst.txt Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,1 @@ +average Fst is 0.16461, using 21 SNPs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/coverage_distributions/coverage.html Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,39 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml"> + <head> + <meta http-equiv="Content-type" content="text/html; charset=UTF-8" /> + <link rel="stylesheet" href="/static/style/base.css" type="text/css" /> + <title>Coverage distributions Galaxy Composite Dataset</title> + </head> + <body> + <div class="document"> + Output completed: 2012-04-03 01:57:24 PM + <p/> + <div id="gd_outputs"> + Outputs + <ul> + <li><a href="coverage.pdf">coverage.pdf</a></li> + <li><a href="coverage.txt">coverage.txt</a></li> + </ul> + </div> + <div id="gd_inputs"> + Inputs + <ul> + <li>Data source: sequence coverage</li> + </ul> + </div> + <div id="gd_misc"> + Individuals +<ol> +<li>PB1</li> +<li>PB2</li> +<li>PB3</li> +<li>PB4</li> +<li>PB6</li> +<li>PB8</li> +</ol> + </div> + </div> + </body> +</html>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/coverage_distributions/coverage.pdf Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,363 @@ +%PDF-1.4 +%âãÏÓ\r +1 0 obj +<< +/CreationDate (D:20120403135724) +/ModDate (D:20120403135724) +/Title (R Graphics Output) +/Producer (R 2.11.0) +/Creator (R) +>> +endobj +2 0 obj +<< +/Type /Catalog +/Pages 3 0 R +>> +endobj +5 0 obj +<< +/Type /Page +/Parent 3 0 R +/Contents 6 0 R +/Resources 4 0 R +>> +endobj +6 0 obj +<< +/Length 7 0 R +>> +stream +1 J 1 j q +Q q 59.04 73.44 630.72 299.52 re W n +1.000 0.000 0.000 RG +2.25 w +[] 0 d +1 J +1 j +10.00 M +82.40 174.26 m +106.73 206.89 l +131.07 206.89 l +155.40 263.98 l +179.73 263.98 l +204.07 223.20 l +228.40 312.93 l +252.73 304.77 l +277.07 255.83 l +301.40 280.30 l +325.73 312.93 l +350.07 321.08 l +374.40 255.83 l +398.73 263.98 l +423.07 231.36 l +447.40 231.36 l +471.73 174.26 l +496.07 215.04 l +520.40 174.26 l +544.73 133.47 l +569.07 157.95 l +593.40 109.00 l +617.73 109.00 l +642.07 92.69 l +666.40 84.53 l +S +Q q +0.000 0.000 0.000 RG +0.75 w +[] 0 d +1 J +1 j +10.00 M +82.40 73.44 m 569.07 73.44 l S +82.40 73.44 m 82.40 66.24 l S +204.07 73.44 m 204.07 66.24 l S +325.73 73.44 m 325.73 66.24 l S +447.40 73.44 m 447.40 66.24 l S +569.07 73.44 m 569.07 66.24 l S +BT +0.000 0.000 0.000 rg +/F2 1 Tf 12.00 0.00 -0.00 12.00 79.06 47.52 Tm (0) Tj +ET +BT +/F2 1 Tf 12.00 0.00 -0.00 12.00 200.73 47.52 Tm (5) Tj +ET +BT +/F2 1 Tf 12.00 0.00 -0.00 12.00 319.06 47.52 Tm (10) Tj +ET +BT +/F2 1 Tf 12.00 0.00 -0.00 12.00 440.73 47.52 Tm (15) Tj +ET +BT +/F2 1 Tf 12.00 0.00 -0.00 12.00 562.39 47.52 Tm (20) Tj +ET +59.04 84.53 m 59.04 345.55 l S +59.04 84.53 m 51.84 84.53 l S +59.04 149.79 m 51.84 149.79 l S +59.04 215.04 m 51.84 215.04 l S +59.04 280.30 m 51.84 280.30 l S +59.04 345.55 m 51.84 345.55 l S +BT +/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 72.86 Tm (0.00) Tj +ET +BT +/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 138.11 Tm (0.02) Tj +ET +BT +/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 203.37 Tm (0.04) Tj +ET +BT +/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 268.62 Tm (0.06) Tj +ET +BT +/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 333.88 Tm (0.08) Tj +ET +59.04 73.44 m +689.76 73.44 l +689.76 372.96 l +59.04 372.96 l +59.04 73.44 l +S +Q q +BT +0.000 0.000 0.000 rg +/F2 1 Tf 12.00 0.00 -0.00 12.00 348.69 18.72 Tm [(Co) 15 (v) 25 (er) 10 (age)] TJ +ET +BT +/F2 1 Tf 0.00 12.00 -12.00 0.00 12.96 195.28 Tm [(Propor) -40 (tion)] TJ +ET +Q q 59.04 73.44 630.72 299.52 re W n +1.000 1.000 0.000 RG +2.25 w +[] 0 d +1 J +1 j +10.00 M +82.40 157.95 m +106.73 166.10 l +131.07 231.36 l +155.40 215.04 l +179.73 280.30 l +204.07 263.98 l +228.40 272.14 l +252.73 231.36 l +277.07 345.55 l +301.40 321.08 l +325.73 288.45 l +350.07 329.24 l +374.40 255.83 l +398.73 280.30 l +423.07 247.67 l +447.40 239.51 l +471.73 215.04 l +496.07 157.95 l +520.40 174.26 l +544.73 166.10 l +569.07 133.47 l +593.40 92.69 l +617.73 100.85 l +642.07 100.85 l +666.40 100.85 l +S +0.000 1.000 0.000 RG +82.40 141.63 m +106.73 166.10 l +131.07 182.42 l +155.40 182.42 l +179.73 231.36 l +204.07 198.73 l +228.40 206.89 l +252.73 263.98 l +277.07 263.98 l +301.40 263.98 l +325.73 239.51 l +350.07 280.30 l +374.40 198.73 l +398.73 304.77 l +423.07 231.36 l +447.40 247.67 l +471.73 239.51 l +496.07 239.51 l +520.40 215.04 l +544.73 198.73 l +569.07 231.36 l +593.40 149.79 l +617.73 166.10 l +642.07 166.10 l +666.40 100.85 l +S +0.000 1.000 1.000 RG +82.40 133.47 m +106.73 133.47 l +131.07 255.83 l +155.40 231.36 l +179.73 272.14 l +204.07 272.14 l +228.40 337.40 l +252.73 280.30 l +277.07 280.30 l +301.40 280.30 l +325.73 337.40 l +350.07 288.45 l +374.40 296.61 l +398.73 223.20 l +423.07 272.14 l +447.40 255.83 l +471.73 239.51 l +496.07 190.57 l +520.40 117.16 l +544.73 125.32 l +569.07 149.79 l +593.40 109.00 l +617.73 109.00 l +642.07 92.69 l +666.40 92.69 l +S +0.000 0.000 1.000 RG +82.40 157.95 m +106.73 190.57 l +131.07 215.04 l +155.40 288.45 l +179.73 231.36 l +204.07 272.14 l +228.40 272.14 l +252.73 280.30 l +277.07 296.61 l +301.40 361.87 l +325.73 329.24 l +350.07 329.24 l +374.40 296.61 l +398.73 272.14 l +423.07 215.04 l +447.40 239.51 l +471.73 190.57 l +496.07 157.95 l +520.40 166.10 l +544.73 125.32 l +569.07 100.85 l +593.40 92.69 l +617.73 109.00 l +642.07 84.53 l +666.40 92.69 l +S +1.000 0.000 1.000 RG +82.40 198.73 m +106.73 157.95 l +131.07 215.04 l +155.40 215.04 l +179.73 304.77 l +204.07 223.20 l +228.40 321.08 l +252.73 361.87 l +277.07 280.30 l +301.40 280.30 l +325.73 329.24 l +350.07 280.30 l +374.40 337.40 l +398.73 231.36 l +423.07 272.14 l +447.40 223.20 l +471.73 174.26 l +496.07 198.73 l +520.40 149.79 l +544.73 117.16 l +569.07 100.85 l +593.40 109.00 l +617.73 100.85 l +642.07 84.53 l +666.40 100.85 l +S +1.000 0.000 0.000 rg +0.000 0.000 0.000 RG +0.75 w +[] 0 d +642.24 362.16 8.64 -7.20 re B +1.000 1.000 0.000 rg +642.24 347.76 8.64 -7.20 re B +0.000 1.000 0.000 rg +642.24 333.36 8.64 -7.20 re B +0.000 1.000 1.000 rg +642.24 318.96 8.64 -7.20 re B +0.000 0.000 1.000 rg +642.24 304.56 8.64 -7.20 re B +1.000 0.000 1.000 rg +642.24 290.16 8.64 -7.20 re B +BT +0.000 0.000 0.000 rg +/F2 1 Tf 12.00 0.00 -0.00 12.00 661.68 354.25 Tm (PB1) Tj +ET +BT +/F2 1 Tf 12.00 0.00 -0.00 12.00 661.68 339.85 Tm (PB2) Tj +ET +BT +/F2 1 Tf 12.00 0.00 -0.00 12.00 661.68 325.45 Tm (PB3) Tj +ET +BT +/F2 1 Tf 12.00 0.00 -0.00 12.00 661.68 311.05 Tm (PB4) Tj +ET +BT +/F2 1 Tf 12.00 0.00 -0.00 12.00 661.68 296.65 Tm (PB6) Tj +ET +BT +/F2 1 Tf 12.00 0.00 -0.00 12.00 661.68 282.25 Tm (PB8) Tj +ET +Q +endstream +endobj +7 0 obj +4763 +endobj +3 0 obj +<< +/Type /Pages +/Kids [ +5 0 R +] +/Count 1 +/MediaBox [0 0 720 432] +>> +endobj +4 0 obj +<< +/ProcSet [/PDF /Text] +/Font <</F2 9 0 R >> +/ExtGState << >> +>> +endobj +8 0 obj +<< +/Type /Encoding +/BaseEncoding /WinAnsiEncoding +/Differences [ 45/minus 96/quoteleft +144/dotlessi /grave /acute /circumflex /tilde /macron /breve /dotaccent +/dieresis /.notdef /ring /cedilla /.notdef /hungarumlaut /ogonek /caron /space] +>> +endobj +9 0 obj << +/Type /Font +/Subtype /Type1 +/Name /F2 +/BaseFont /Helvetica +/Encoding 8 0 R +>> endobj +xref +0 10 +0000000000 65535 f +0000000021 00000 n +0000000164 00000 n +0000005129 00000 n +0000005212 00000 n +0000000213 00000 n +0000000293 00000 n +0000005109 00000 n +0000005293 00000 n +0000005550 00000 n +trailer +<< +/Size 10 +/Info 1 0 R +/Root 2 0 R +>> +startxref +5646 +%%EOF
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/coverage_distributions/coverage.txt Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,18 @@ + + 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 + PB1 2 6 10 15 21 25 32 39 44 50 57 64 70 75 80 84 87 91 94 95 + PB2 2 4 9 13 19 24 30 35 43 50 56 64 69 75 80 85 89 91 94 96 + PB3 1 4 7 10 14 18 22 27 33 38 43 49 52 59 64 69 73 78 82 86 + PB4 1 3 8 12 18 24 32 38 44 50 57 64 70 74 80 85 90 93 94 96 + PB6 2 5 9 15 20 26 31 37 44 52 60 67 74 80 84 88 92 94 96 98 + PB8 3 5 9 13 20 24 32 40 46 52 60 66 73 78 84 88 91 94 96 97 + + + 20 21 22 23 24 + PB1 97 98 99 99 99 + PB2 98 98 98 99 99 + PB3 90 92 95 97 98 + PB4 98 98 99 99 99 + PB6 98 98 99 99 99 + PB8 98 98 99 99 99 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/dpmix/dpmix.html Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,56 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml"> + <head> + <meta http-equiv="Content-type" content="text/html; charset=UTF-8" /> + <link rel="stylesheet" href="/static/style/base.css" type="text/css" /> + <title>dpmix Galaxy Composite Dataset</title> + </head> + <body> + <div class="document"> + Output completed: 2012-04-03 02:22:23 PM + <p/> + <div id="gd_outputs"> + Outputs + <ul> + <li><a href="dpmix.pdf">dpmix.pdf</a></li> + <li><a href="misc.txt">misc.txt</a></li> + </ul> + </div> + <div id="gd_inputs"> + Inputs + <ul> + <li>Data source: sequence coverage</li> + <li>Switch penalty: 10</li> + <li>Also analyze random chromosome: no</li> + </ul> + </div> + <div id="gd_misc"> + Populations +<ul> +<li> +Ancestral population 1 +<ol> +<li>PB1</li> +<li>PB2</li> +</ol> +</li> +<li> +Ancestral population 2 +<ol> +<li>PB3</li> +<li>PB4</li> +</ol> +</li> +<li> +Potentially admixed +<ol> +<li>PB6</li> +<li>PB8</li> +</ol> +</li> +</ul> + </div> + </div> + </body> +</html>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/dpmix/dpmix.tabular Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,78 @@ +chr1 0 125154818 0 PB6 +chr1 0 125154818 0 PB8 +chr2 0 85243509 0 PB6 +chr2 0 85243509 0 PB8 +chr3 0 92410450 0 PB6 +chr3 0 92410450 0 PB8 +chr4 0 75619257 0 PB6 +chr4 0 75619257 0 PB8 +chr5 0 90203461 0 PB6 +chr5 0 90203461 0 PB8 +chr6 0 74848993 0 PB6 +chr6 0 74848993 0 PB8 +chr7 0 55833450 0 PB6 +chr7 0 55833450 0 PB8 +chr8 0 71187818 0 PB6 +chr8 0 71187818 0 PB8 +chr9 0 39008708 0 PB6 +chr9 0 39008708 0 PB8 +chr10 0 59511126 0 PB6 +chr10 0 59511126 0 PB8 +chr11 0 53408638 0 PB6 +chr11 0 53408638 2 PB8 +chr12 0 71364712 0 PB6 +chr12 0 71364712 0 PB8 +chr13 0 66022136 0 PB6 +chr13 0 66022136 0 PB8 +chr14 0 56768832 0 PB6 +chr14 0 56768832 0 PB8 +chr15 0 45107015 0 PB6 +chr15 0 45107015 0 PB8 +chr16 0 49888550 0 PB6 +chr16 0 49888550 0 PB8 +chr17 0 61714821 2 PB6 +chr17 0 61714821 0 PB8 +chr18 0 58130413 0 PB6 +chr18 0 58130413 0 PB8 +chr19 0 56559549 0 PB6 +chr19 0 56559549 0 PB8 +chr20 0 46551277 0 PB6 +chr20 0 46551277 0 PB8 +chr21 0 43475551 0 PB6 +chr21 0 43475551 0 PB8 +chr22 0 62406302 0 PB6 +chr22 0 62406302 0 PB8 +chr23 0 48285470 0 PB6 +chr23 0 48285470 0 PB8 +chr24 0 46598214 0 PB6 +chr24 0 46598214 0 PB8 +chr25 0 51074589 0 PB6 +chr25 0 51074589 0 PB8 +chr26 0 36606979 0 PB6 +chr26 0 36606979 0 PB8 +chr27 0 45471964 2 PB6 +chr27 0 45471964 2 PB8 +chr28 0 36441234 0 PB6 +chr28 0 36441234 0 PB8 +chr29 0 21150118 0 PB6 +chr29 0 21150118 0 PB8 +chr30 0 33636744 2 PB6 +chr30 0 33636744 0 PB8 +chr31 0 26434322 0 PB6 +chr31 0 26434322 0 PB8 +chr32 0 38901021 2 PB6 +chr32 0 38901021 0 PB8 +chr33 0 26189703 0 PB6 +chr33 0 26189703 0 PB8 +chr34 0 42800126 2 PB6 +chr34 0 42800126 2 PB8 +chr35 0 25394646 2 PB6 +chr35 0 25394646 2 PB8 +chr36 0 32954182 0 PB6 +chr36 0 32954182 0 PB8 +chr37 0 31853191 0 PB6 +chr37 0 31853191 0 PB8 +chr38 0 16186061 0 PB6 +chr38 0 16186061 0 PB8 +chrX 0 93319721 2 PB6 +chrX 0 93319721 2 PB8
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/dpmix/misc.txt Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,11 @@ +state 2 agrees with: PB1 PB2 +state 0 agrees with: PB3 PB4 + +PB6: 360 SNPs where state 2 is as likely as state 0 +PB6: 12 SNPs where state 0 is more likely than state 2 + +PB8: 358 SNPs where state 2 is as likely as state 0 +PB8: 14 SNPs where state 0 is more likely than state 2 + +PB6: 0 = 83.7%, 1 = 0.0%, 2 = 16.3% +PB8: 0 = 87.6%, 1 = 0.0%, 2 = 12.4%
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/evaluate_population_numbers/evaluate_population_numbers.txt Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,2 @@ +CV error (K=1): 0.07423 +CV error (K=2): 0.07708
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/extract_primers/extract_primers.txt Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,1265 @@ +> Contig161_chr1_4641264_4641879 115 C T 0.323016 + + 1 TCCGAACCGCTAAATCCTGACGACTGTTCAGTGAGAACGGGnTTCCAGCTCAGTGGAGAC + >>>>>>>>>>>>>>>>>>>> + + 61 ACTCAGAGCTTATGTGATGCACCGTCGTGCCCGTGTCTGACTAAATGTGTTGCCAGAGAA + <<<< + +121 CAAAACGAAAGCCCCTATT + <<<<<<<<<<<<<<<< + +> Contig86_chr1_30984450_30985684 670 C T 0.031427 + + 1 TAATTCATGACGACTGCAGAAGGGCACTCAGAGGCAATTCTACTTGAGGATATTGTCTGG + >>>>>>>>>>>>>>>>>>>> + + 61 TATACTCTGTCCTTGCTCAGGACATCAGTGAGAACATAGAAACATTCACnTCCCCACACC + + +121 GAAAGCGTCTGTAGACCGGCCCACGGGCCGAAGTCTTTGCATTTCCTCTTGCCATGCACG + + +181 AGCATTCCCAGTGGCAATCAGGGGCCAGCCCTTCTGTTTGGCCTCTGCAAGCTTGTATCC + <<<<<<<<<<<<<<<<<<<< + +241 TTG + + +> Contig21_chr1_60697952_60699446 307 G A 0.507396 + + 1 TCTGGGGCCATGTTTCTGAAGTAAGGCTGTTTCTGCAGCCTTGCGGGCTGTGTCTTGCTC + >>>>>>>>>>>>>>>>>>>> + + 61 nCACCCCTTAATTCTTACCTGTAGGTGGTATTTGGTAGAGTGGAGTAAAACTGGAAACTG + << + +121 GTTCTCTGTGTTCCTGCATCT + <<<<<<<<<<<<<<<<<< + +> Contig64_chr1_87343284_87345672 163 T A 0.038702 + VspI + 1 ATGGCCAATTCTGGTTTAcGCATCATTGTTAACAACTCTTCCATTCATTCTCAGAATTTT + >>>>>>>>>>>>>>>>>>>> + + 61 CCCAATTCACATGATAAATTGTATGGTCACCTACcTACAACTAAACACTTAGTTTATTTC + + +121 TATTATTATTATTATTATTATTATTATTATTAnTAtTATTATTGAAATACATTTTTTTTT + + +181 CATAAACCGTTCACcCTTGTGAGAAC + <<<<<<<<<<<<<<<<<<<< + +> Contig20_chr1_110679280_110679687 181 C T 0.659726 + + 1 GAGCACTCAATGAGGGGTTCGACCCTTTGCAGACACAGCATGTAGGAGGAAGAAATGCAA + >>>>>>>>>>>>>>>>>>>> + + 61 cGGGGCACCCCTGCGGGGGCAGGCTTCCAGTTCAAACTGATCnGGTCTGGTCCTGGGGCC + + +121 GGGCCAAAGTTGTGGTTTCcCGCACTCAAGTCTCCAC + <<<<<<<<<<<<<<<<<<<< + +> Contig222_chr2_9817738_9818143 220 C T 0.092668 + SpeI + 1 AGATTTAGCTGGAGCATGCCTTTGCCCTTTTTAGCCTTTCCCTTTTACCTTTATCCTTCT + >>>>>>>>>>>>>>>>>>>> + + 61 TATTCTTGAAATGTTGAAATAGATGGAAGTATAGCAGCTATCTTGTCCCATAATGATGAA + + +121 AACCAGGTACAAAGTTGGTGAAAACTAAAAGAGAGGAGGAGCCTGGGTTCTTGGTGGCAT + + +181 CATGAACACCTGCACnAGTCTAGCATGGTCTGTGCAAAATCTCCTGATCCAAGAAAAATA + + +241 TAAACATCCTTCTGTAGGGTTTTATTgCCTGAAGCAAAA + <<<<<<<<<<<<<<<<<<<< + +> Contig47_chr2_25470778_25471576 126 G A 0.289103 + Bsp1286I + 1 GCCAGGCGTCCCTCTTTTTGAGTTCtAATTGTGTACATCCAATCCCCATCTCAACAAATA + >>>>>>>>>>>>>>>>>>>> + + 61 GCTGAACCAGCTTCCTaTTTATTTGGTAGGTnAGCACTCTAGAAATTTGCTACACTGAAC + + +121 TCACCAAATTTATAATGTaAATTATGACCATTCTTTGCCATAATAATTTGGGGTAGGTCA + + +181 GATTTGGTTTTGGGGGCAGAAGAAATCATCATATCACAAGCATGTGACAGCTTCCAGCCC + <<< + +241 CATCTCAACTCCAAGAAATT + <<<<<<<<<<<<<<<<< + +> Contig6_chr2_56859179_56859956 671 T C 5.308026 + MspA1I + 1 TATCCCAAAGACGTGTGTCTCAAAGCCCTGAGGTTTACAGCCAAACATGATGGACTGCCC + >>>>>>>>>>>>>>>>>>>> + + 61 ATGACAAcGGATACAAATGCTAGCgTGGGTTTAATTATGCTAGAATTTTTATGATAATTA + + +121 TAATGATATTGTTATGAAGTATGCTAGGCTTTnAGCGGCTAGTCTCTAAACCTATTTTCC + + +181 tTATAAATCCTTTTATTTTTAGTGCACTATTTTATAGAATAAGAGGTTTTTCAGGAACAC + <<<<<<<<<<<<<<<< + +241 ATATATTGCATT + <<<<<<<<< + +> Contig163_chr2_76402959_76404830 221 C T 0.178077 + + 1 GCCCCTTCGAGTCCATCTTaCGCgCAGCAGCAGGAGGGATGGTCCCAACCACAAACCTAC + >>>>>>>>>>>>>>>>>>>> + + 61 CCGCTGCCTGAACGCTTnAAGTGCCCTCCGAAGAAAGCCCAACTCCACAGCCTGGCAACT + + +121 GAGGTCCTTGTGATCTTAGCTTCCTCTGCCCCACTCCACAGCTCAGCCTCACCgGACTCC + + +181 CGAGCTCCTTAAAGGAGCCCCCGAGCCCCCGCACATGCTGTTCCCTGTAACCGGGTACTC + + +241 CACGGCTCGTCTGTCCTTGGAGGCTCAGCTG + <<<<<<<<<<<<<<<<<<<< + +> Contig56_chr3_17326225_17327548 387 G C 0.224947 + AgeI,HpaII,MspI + 1 CAAAGGCAGTGATATGGGAGTGGAATGGAGAGGATGGGTGCCCCAGACTGGGTGCAGATC + >>>>>>>>>>>>>>>>>>>> + + 61 TGTTCTATCTGGTGTTTGGTGGCTGACCATACnGGTGAGAAGAAGTGTcCAGGTTTCTGG + + +121 CTTGATGATGCCGACAGTTATGGCAGGAAATGCTGAAGGGGTGCACATGAGCTCCTGTTC + + +181 ATTCTTCACTCTTCCTCTTCTACCTCCAACCTTGCTACCTGTGTGTACCCGACTC + <<<<<<<<<<<<<<<<<<<< + +> Contig108_chr3_46210055_46210874 367 A G 0.027845 + + 1 TTCACTCACCTGCTTCCCTGCTAACTGTCACCGCCCTCCCAATGCCTTAAACCAGCTTAG + >>>>>>>>>>>>>>>>>>>> + + 61 AAACACAAAATTTAAAAAACATTATGTTGAGACAAAAATATGTATAACCTGGAATATTGA + + +121 ATAACAAAATGAAAGGGAAAATGATTCAAGAACACTTGGATAAGGAAAACTACAAATATT + + +181 nAAGATGTACCTTTGAACTTCCTATCACTGAAAGCAACCATGGAACCAGTACAATGTAGA + + +241 CCTTCTGATCTGACTTTCTTTTGTCTCTTGCTGCTGGGAAGTAGAATGCCCC + <<<<<<<<<<<<<<<<<<<< + +> Contig1_chr3_51588422_51589409 926 A G 1.147200 + + 1 AGATTATGGCCTGTGTTTACcCCAGCCTcGCAGAACATTTTACTGGGGACACCTGCCAGG + >>>>>>>>>>>>>>>>>>>> + + 61 TGGCAGATCAGAAGCCCGTGAGGCAGCCAGCCAATGGGAtGGCCAAAACCTAGGGCTTCG + + +121 TAnGGGAGGGAGATGTTTTCCTCgTCCCTCT + <<<<<<<<<<<<<<<<<<<< + +> Contig65_chr3_80727952_80728283 39 T C 7.077725 + + 1 CAAAGGCTTACTTTTTaGATCAACACTCTAAATTCTTAAnAAACAACAAAGCCAAATTTT + >>>>>>>>>>>>>>>>>>>>>>>>>> + + 61 CCTATATCATTGAGTAGTTGATACGTCTTTGGTTTTGCGCTAGCAGT + <<<<<<<<<<<<<<<<<<<< + +> Contig134_chr4_12145648_12148225 1326 C T 0.079565 + BalI + 1 AACCCAGAtCAGAAACGTCCCATGGCTAGTCATCTTCCTACACAGACTTCTgAGAGCCAA + >>>>>>>>>>>>>>>>>>>> + + 61 GCATCGTCAAcCGGCCAtTCTnGGCCATTCTCCCGAGCAGATGCTGCCGGGATAATCTGC + + +121 AGCATGAAGCCCTCCCTCGGGGGAGACCCGACcgGGTCCACACAGGTCTGTcTAGC + <<<<<<<<<<<<<<<<<<<< + +> Contig19_chr4_26233601_26233991 146 G C 0.163005 + DpnI,MboI,Sau3AI + 1 AATTTGGCTTCCTCTGGAGTtGTCCCTTAATGCTAGGTATCAAGTGCTGACAGGCCACAG + >>>>>>>>>>>>>>>>>>>> + + 61 ATnAGGGTAACACATGATTACAGGGCAACACACTGTAACACGTATTCCCTTGCCTTGTCT + <<<<<<<<<<<<<<<<<<<< + +121 T + + +> Contig17_chr4_61310346_61311158 267 C T 0.097708 + + 1 TATTCCAGACCAACCAAAAGGTCTAAGGAATAATAGAAGCTTCACCCACAGACCTGCCAC + >>>>>>>>>>>>>>>>>>>> + + 61 CCAACTTGAGAAACAGCACTTGCTTCCTCATAGAGTCGAAACGTCTTCGGTGGGTCCCCT + + +121 CCTGAAGCATCACCGCTACCTTTCCTCTTGGGAGTCACTGCCACCCnGAACTTGTTGCTG + + +181 CTTATTCTCTTTTATTTTTCTTGTTTTTGAAAGAACCCTGTCTTGGGTGTTAGGATAC + <<<<<<<<<<<<<<<<<<<< + +> Contig31_chr5_4734956_4736547 1166 C T 0.020932 + + 1 TGTTCTGCCATGCACACTTCTTCAACCCTTCAACCTGTGGGAGTCACCTCACATTCCCAC + >>>>>>>>>>>>>>>>>>>> + + 61 AGcGAATGGAATATCTATCTATCTgnCTTTAGGGATTTGTTACGTTTTCTTTTTCTTCCT + + +121 TTTCCTTCCAATATCTTAATGGGCAATTTTGTGGACAGTTGATAGAGACAACGTCAGGAG + <<<<<<< + +181 CTGTTGGCCTAGTAAA + <<<<<<<<<<<<< + +> Contig6_chr5_26899813_26900498 97 A C 7.369943 + AvaII,Sau96I,SinI + 1 AACTGAAAGTGAGAATTCTTTGTATTTGCTAGTCAAAAGGATTTCTAAGTCAAAAAAGTA + >>>>>>>>>>>>>>>>>>>>>>>>>> + + 61 ATTTGGGAnCATTAAGTCATATTTATAGACTAAAATTTCATTCCTAAAGACAATTTAGTA + + +121 AAAATGCTAGGCTTTCTAGAAATTTAACCTAACATAAAAAATTACAGTAAGTTTGCTAAA + + +181 GAATCACAGAGTTGACTGACAGTTTCCCAGGTTA + <<<<<<<<<<<<<<<<<<<< + +> Contig45_chr5_50892738_50892968 169 C A 0.496871 + + 1 TGAAAGGGGCACTGGGAATTATCAGAACCTTCTGGGTAATTAAACTGGGGAAAGCATAAT + >>>>>>>>>>>>>>>>>>>> + + 61 ACCATTTAGAAAAAGTTCAAGTGAGTCTTTTCCTTATTCTCCCnTGTACCCAGAAAAACC + <<<<<<<<<<< + +121 TGGACATGGTAC + <<<<<<<<< + +> Contig45_chr5_76133561_76134403 388 A G 0.038045 + + 1 CATGAGCATGCTGTCTGCACAAtGGGAGCACCCGTGATGTGAGAGTAGCCAGGCCACCCT + >>>>>>>>>>>>>>>>>>>> + + 61 GGCTTGAGTGCTTTGTCCAAAAGGCACAATGGGAACTACACAGAAACAATCAGATTCACT + + +121 GCCTTCGAGGGTTTGAAGAAGACAGCTGAAGAGTAGGAGGTAGAAnCAAAAAGGCATGAG + + +181 AGGGGGAAGCAGAGGCTGCAAGACATGAGCTGGGCAGTACTGACgGGCCACACAGAGCAC + < + +241 TGGAGACAAGGTCAGGAGCCCT + <<<<<<<<<<<<<<<<<<< + +> Contig111_chr6_5821219_5822519 1060 A G 0.230765 + AvaI + 1 CGTCAGAGCTGTCTTCCCTCCAGCCAGAGGGGCCCTGAGAAGGAAGGGGGCTGAACCCAG + >>>>>>>>>>>>>>>>>>>> + + 61 GCgCCAGCCCAAGCTGCAGCGTGATCTGGGGGTGAGGCCCCCCGCTGCACAGGGGGCACG + + +121 GGGGTTCGGGCAGAGATCGGCTACCCATGGCCGGCGAGGCCACAgTGGCAATGGGCAGCC + + +181 AGCCTCCGACCAGCcGCCCCCnAGCTGCCTATTTAAGTCAGGAGCTTCTCCTTCCCgTGG + <<<<<<< + +241 AAGTAGAGGACAAATT + <<<<<<<<<<<<< + +> Contig102_chr6_30271329_30271577 39 T G 1.158547 + + 1 TCTTCCTTTATGCATCAGGGCAGCACCCTGGGGAGAAGnGGGGGGGACAcGTGTGTCCTG + >>>>>>>>>>>>>>>>>>>> + + 61 GGGAAAGGGGTGTTCCCACTCCCTGCAATGCCTTCCCCCGCCCAGACCAGCAGTTCTCAG + + +121 TCTTGACTGCATGGACTCTCCTGGAAGGCTTTAAAAAATGTGGAGGCCGAGGCTTACCCA + + +181 tGACGGTTCTGACTGAATTGCTCTGGAGTAGGGCTTAGGCACTG + <<<<<<<<<<<<<<<<<<<< + +> Contig112_chr6_51024554_51024851 100 A G 4.286925 + + 1 CTTCATCATACCTATCATTGCCTATCGTTATACTATAGAGGTATTGTTCATTCTTTTTTA + >>>>>>>>>>>>>>>>>>>>>>>> + + 61 TAGACTCATTGAGTAAAACTCAGGnCATGAGGGAAGGAACTTTGTCTCTTGTGCAATTCC + + +121 CTATCCTCAGTCCTTAAATATATGTATGCTAcCCAATAGGCACCAAATAT + <<<<<<<<<<<<<<<<<<<< + +> Contig84_chr7_6648683_6650255 1297 G A 0.165637 + + 1 GTTAGTTGTGAACACTCCCCAGGTAAACTGGTGTAACTCTTGGGGCAAAGCATGGAGTCT + >>>>>>>>>>>>>>>>>>>> + + 61 ACCCAAAAATGTAGAATTCTGCAGAGACAGCTGTTTCTTGGTTGGGTTTCTAGACCAGAA + + +121 AATAGAAGATTATAATTATGGGTGGGAAATATATGTGCAAAAAAGTATAAAAGAAGAGGA + + +181 ACAGAATAAAnGGAAATGGAAATGTTTGTAATTGATAGGGATGTGGATGTAAATGCCTGG + <<<<<<<<<<<< + +241 CAGAGAGGAGG + <<<<<<<< + +> Contig206_chr7_26281823_26282074 103 C A 0.947486 + NheI + 1 ATCCACATTCGCACAGCTCCTAATATAATATTTCATTGTTAAAATACTTCTGATTGGCCT + >>>>>>>>>>>>>>>>>>>> + + 61 AGGACATATTTTTACAACTGCCTTGACTTCAATTGCTAGnAGTAGCTTGCCAAAGAgGTG + + +121 CTTTAATAAAGGAAATTAACTTCTTTTAATATGTTGACTGATATACCAAGGTTTTAGTGC + + +181 TATTAGTTTACCTTCCCCAAAAGTGCTTA + <<<<<<<<<<<<<<<<<<<< + +> Contig38_chr7_50681997_50682600 42 T C 0.145997 + + 1 TAGAGCTCTCAGCATCCAAGCAGAATCTACTGGGTCTGACTGnGTTCTGCTCTGTCACTG + >>>>>>>>>>>>>>>>>>>> + + 61 GAATGACATTTCATTGCAGAGTACTCCTGCAGTACAACCAGGGCACAGCCTTTAAATTGA + + +121 CCATGTCCCCTGGTCTaCTCTGCTGAGCTaTGCACGGGTCCCTTCTGGTTCAAACACAGA + + +181 CTGATACAGCTCAGATGGAAGGGAGGCAGTTGCAGAGAAACAAA + <<<<<<<<<<<<<<<<<<<< + +> Contig91_chr8_12804505_12805470 409 C A 0.175272 + + 1 CTGTTTTCAGGGGCTACCTGCTATCTCCAGAACATGCCTGGCTCTCCTCCAAACACTGTT + >>>>>>>>>>>>>>>>>>>> + + 61 CAAnCTGACCAAAGCAGAGAGCTGTATATGGACCACACATACCAAAAAAAAAAAAAAGAC + + +121 AGTCCACACCCTCTGTATAATTATATGGTACAAATAATAGAGTTTTTGTTAACTACCAGC + + +181 TCTTTTTACAAAGCCTATCAAgTATCATAGACAGTATAATGCTGTGATTGCATCTGTGAA + <<<<<<<<<<<<<<<<<<<< + +241 CC + + +> Contig8_chr8_27811135_27812620 333 C T 0.272485 + + 1 CTTCAAGGAAAGGAGGCAGTTTGGACAAGTCAAAAAAATCCCAAAACtTTGTACTATATA + >>>>>>>>>>>>>>>>>>>> + + 61 AATCTGGCATATTTGTTGATGACanAATTGAGTTAGAAGCAAGAGTCAGAAGCTGACTTT + + +121 CATGCTGTTTTTCTGTTGTTTTCTGCGGCTCCCCTATGTACTAGTTCTCTTCCgGTGTGC + + +181 TGACAACTTCCAACTTcTCATAcCCTCTGCATTTCACGTTCTGC + <<<<<<<<<<<<<<<<<<<< + +> Contig17_chr8_57490059_57490498 69 G T 0.522227 + BglII,DpnI,MboI,Sau3AI,XhoII + 1 CACCAGAAAACAGGCATGGAACAGATTCTTTCAnATCTTTAAGAACAAACCAGTCCTGCT + >>>>>>>>>>>>>>>>>>>> + + 61 GACACATAGATTTTTGGACTTTTGGCCTCTGTAACTGTGAGAATAAATTTCTATTTTAAG + + +121 CCATCTACTTTGTAGTAATTTGTTATGGCAGCCCTGAGAAATTA + <<<<<<<<<<<<<<<<<<<< + +> Contig73_chr9_29451535_29452248 616 A G 0.448230 + Eco47III,HaeII + 1 ACCCAAGAGTCTGAGAGGCCCAGAGGCAGCTGGAGGCTGGAGGAGTCCCaCAGGCAAACC + >>>>>>>>>>>>>>>>>>>> + + 61 CTCCATTCCATGCGCCCCAGGGAGGCCAGGAAATCAGCnCTCCCAGGAGCAGGGAAGCAG + + +121 CAGTCCCTGGCATTGCCAGGGCAAGTGGCCACTCAGGGGAGAAAGGGGTGAGCTGGGGAG + + +181 GGGGAAGAGGGGAGGGGAGGGAAGGCAGAGACGAAGAGAA + <<<<<<<<<<<<<<<<<<<< + +> Contig96_chr9_39008495_39009278 215 A C 0.426539 + SspI + 1 TGGCAAACTCCTTGTGAATGCCACTACACTTTCTGGTCTCTGTATGTAATGCTAGATATT + >>>>>>>>>>>>>>>>>>>> + + 61 ACTGACACTTACcGCTACAAAGGCAAGACAAGCAAGACAACTGACATACACCCAgGTATG + + +121 GATCTATGAAGGGAGCTCCTTCTGCTAGAAAACAATATGTAAnTATTTACaTAACACCTA + + +181 CAATTCTAAATGGGTAGTTTCCCACATGTGAGATTACATCTTCAAGAGCCAAAGGACAAT + <<<<<<<<<<<<<< + +241 TTGTGCATC + <<<<<< + +> Contig22_chr10_15505382_15505589 172 T C 2.860867 + + 1 CGCAGGCGCCCCAATTATTCTTAACTCCTTATCAAAAGTTTTCCTAATTGAAACTTAAGC + >>>>>>>>>>>>>>>>>>>> + + 61 ATCACCTGTTTATTTCCTCTAAAATAAATGTATACATATAGAATTTCAGTAAGATAATGT + + +121 CTCAAAGAAGATGATAGCCATGGGAGAGGCTTATATGTACTTCnTATAATAAACAACGTC + <<<<<<<<<< + +181 CAGGTGTGATATT + <<<<<<<<<< + +> Contig69_chr10_40547265_40548153 371 G A 0.137642 + Bsp1286I,CfoI,HaeII,HhaI + 1 AAGGGGAAGAACTGAAGCGAGTGAGAAGCACGGAAGGACTTTTAGGTTTACAGCTGGGGT + >>>>>>>>>>>>>>>>>>>> + + 61 CACTGGTCTTCGCTATGGATGCCTCTCTTAAAGGAAAGACTAATTCTCTGTGGGTACTGA + + +121 AGGTGgGAGATGAATGTAGATGGGCnCTCGCATGTGTCAATGCTGACGGCTTGGTGAGAG + + +181 GTTTGGTGCGAGGCCAAAAAGGCgGGGATgAGAGAAGGATGACCTAGGGAGACTGCAGGG + + +241 TATTTAAAAGTTTGGGTCCAATTTTTCTCAAAGTGTGGCCAGTGCAC + <<<<<<<<<<<<<<<<<<<< + +> Contig9_chr10_51475063_51476054 770 C T 0.393903 + + 1 GTCTTCCTTCTAATCCCCaAGCcGTGAGAAGCTGTCTGAGCGCTCCTTGCTGGGCGTCCC + >>>>>>>>>>>>>>>>>>>> + + 61 TGCATGCCTGTACTGGGGCACACCTACGCCCTGGGTCCTGCTnCTGAAACGGTGTCTCAT + + +121 TTCTGTAATCGCTCCAAGCTTAATGGCTCTCAGCCTTGTGGGTTGCAGTGGAGAGAAAGC + <<<<<<<<<<<<<<<<<<<< + +181 ATT + + +> Contig72_chr11_7142765_7143772 146 G A 1.137400 + + 1 GGTGTAGTGAGGCTTCCACGAGCAGCCAGGCTTACAAACTCATCCTTAGCCTAAAAACTC + >>>>>>>>>>>>>>>>>>>> + + 61 CACnAAGTCAAGTATCTTGTGGGTGTTGAAAACTGTTCCACTCTGCAGAGCACCTCTATA + + +121 TGAAGTAATAATCATGGTATAATGTCCTTCTTCACATACCTGCCAAGAT + <<<<<<<<<<<<<<<<<<<<< + +> Contig7_chr11_40017076_40017630 352 C T 0.336170 + + 1 TCGGTCCTTCCTTGATCACATCTCCATGATCCTCCCACTGTTACTTGGAGGAGAATTGTT + >>>>>>>>>>>>>>>>>>>> + + 61 GCTTCCACAAATCAGATCTCTTTATTTTTCATTTATTCAACAAATGTGGACTGAGCTCTT + + +121 TGtATAGTACATTCTGTGGGCACTATTCACTAGACACACTGTAAACACTTCTGCTTCCTG + + +181 ACTTTGTTCAGATCTACCCCCnTGCCTGATCTGCCCTCCCCACCTGGTTTTCATCTCAGG + + +241 CTTAGGTCAAGCCTCATtTGCACTTC + <<<<<<<<<<<<<<<<<<<< + +> Contig16_chr11_53408448_53408790 187 A G 1.366749 + + 1 ATATTGCCAGTTTTAATGGGTGATATTTAGTCCTCCAATTAGACCTCTTTAGTGCATTGG + >>>>>>>>>>>>>>>>>>>> + + 61 ATACCAGTGAGCAATCATTCTGACAnAATTTCTGCTGCCTTGATTTTTGTGACAA + <<<<<<<<<<<<<<<<<<<< + +> Contig21_chr12_18403415_18404381 586 G T 0.068025 + + 1 AGTTCCAATGTCAGAGTCCCTCCCTCTACCTCCTATCCAACCcGCTACTTTTTTTnTTTT + >>>>>>>>>>>>>>>>>>>> + + 61 GTTTTACAACAAAAATAAACCTTCTTGTAACAATTCCAACAATTACAAAATAGAGTAAAA + + +121 TGTTTAAGTCTCTACCTAAACACACTCATCCTCAGAGAAACTCACAGGTAATTTCGGTtC + + +181 GTATCTTCCCAGACCCTCTTCTCAGcTTTCACACATACTACATACACATGAACTTCGAGC + <<<<<<<<<<<<<<< + +241 TGGCTGTT + <<<<< + +> Contig41_chr12_25565452_25566993 475 G T 2.230501 + + 1 TTACATAGCcAAGTGGGAAACAAAGcTACATTTTTnAATATTAATAAATCTGTTTTTTTA + >>>>>>>>>>>>>>>>>>>>> + + 61 AAGGGTTaTTATACAATATTATCAAACTTCTTGTGAATGTCAGAATCCAGAACAAACCTA + + +121 AAATCAGTAATACTTGGGAAAGACGCAAATAGTCCCTCTTTCCACT + <<<<<<<<<<<<<<<<<<<< + +> Contig5_chr12_53880670_53882675 1221 A C 0.061001 + + 1 AAGCCATCCATGTGTGTGCTTTCATAATATATTATGACAGGAGATTAAATTCTAAGTAAA + >>>>>>>>>>>>>>>>>>>> + + 61 GATTAGTCCCCAGTACAGTAGTGTAAAATAGGACTTTTCTCCCTTTTTCTCTCCnCGATA + + +121 TTCCAAATCAGAGTTTGGCCAAAAAgAAGTCAATAAGGACTTAcAAAAAAAAAATCTCCA + + +181 TTCACTGAAAGTAGCTTGCTAGCATTTTCCTTTCTcCTGATGTTGCTCCATAACTTCAAC + + +241 CCTTTTTAAAACTGTCTACTGTGGGGTAGACAGAAGGCGTGGTCGTGAGGTAAAGGTCAA + <<<<<<<<<<<<<<<<<<<< + +> Contig107_chr13_26045881_26046290 341 C G 4.509990 + + 1 CATAGATTGCCTTTTCCAGTCcAGAAGTTTAGAACAGACTGCCCTGAGATCATGGTGGGA + >>>>>>>>>>>>>>>>>>>> + + 61 AATATAATACTCATTAGGTTgTTGAAATTCTTGTAGGAATGGAAGAATTTCAGCTTAGGC + + +121 ATTCTGCTnCTGTATTCCCAGATTACAGTGGGAACTGTATGAAA + <<<<<<<<<<<<<<<<<<<<<<<< + +> Contig251_chr13_28498333_28501066 864 T G 0.067573 + + 1 GCCCTCTGGCTTCTGTTTGGGAGGTAGGGCGGGTGGGCAGGAAGGGAGGACGGTCGGGGT + >>>>>>>>>>>>>>>>>>>> + + 61 ATTGGTTCnCCTCCTCCtGCTGGGTCCCAGATGGATACAGGCCAGGTCTG + <<<<<<<<<<<<<<<<<<<< + +> Contig55_chr13_53467708_53468101 221 T G 5.717222 + HinfI + 1 AATACGGTGAAGAGCAAATGAGAAACATTTCTTCAAACATTTGTAAAGTGAAAATATTTA + >>>>>>>>>>>>>>>>>>>>> + + 61 AAATGAAATAGATnCCAAATTTTTTCTTCCAAtGGATTATCTACTGGGTTCTGAATATCA + <<<<<<<<<<<<<<< + +121 CAAAGACAAATG + <<<<<<<<< + +> Contig48_chr14_11839435_11843272 3014 A G 0.907583 + + 1 GTGCTTCCAGTCAAAGGGGAAAACTTGATAGACAAAAGTTTGGATTTTTTTTTTTTTCCT + >>>>>>>>>>>>>>>>>>>> + + 61 TCTCCTTGGGAGTATGTCTGAGTTACCGTTTTTAGTTTTGATCTGTGGAAAAAGTGATTA + + +121 TATAGGTTCCAAATCTTACTTTTCCCTTTTTGTTTTCAATAGACTTTTTGTGATCATTTC + + +181 AnCATAGTTTGTATTATTAAGTAGGGGTTTTtTTTTGTTTTGGTTTTTTTGTGGTTGTGC + <<<<<<<<<<<<<<< + +241 GTTGTAAG + <<<<< + +> Contig28_chr14_26905747_26909514 975 G C 0.116622 + AluI + 1 CTGGTACGTGCTTCTCCTCCTGCAGCCCACCGTTTACTTGGTAAGTCGCTGCCGATCCGG + >>>>>>>>>>>>>>>>>>>> + + 61 CGCCCCCGCAATCCCACCCTCGTCGCGAGGACAGACAACCAGGGGCGCGCGGGAGGAGGG + + +121 TGAGACCGCCAGTTCAGCGGAGCAGCGTTCCTAGCGACCGTGTTGGAACAACTTTGGCAA + + +181 nCTGGTCTTTGGATCCCTGCGGGATTTTTCGGGTTTCCCACCCTCATTTCTTGCTT + <<<<<<<<<<<<<<<<<<<< + +> Contig64_chr14_56768376_56768902 473 C T 8.281311 + + 1 ATAAGAATCTCCTCAGTAGAGAGAAGCCTGATCTACCATGATTTTATTTGAGTAAAACCA + >>>>>>>>>>>>>>>>>>>>>>>>> + + 61 TTGAAACAAACAnTTCAAGAAAGATGGTCAGAGAAGCAAAATGTAA + <<<<<<<<<<<<<<<<<<<<<<< + +> Contig60_chr15_18493036_18494316 150 G A 0.125024 + + 1 CGCCTGGAATAGCATGGTGCCTTTAGGAAATTACATCTAACTCTCTAGGGCTGGAAGGAA + >>>>>>>>>>>>>>>>>>>> + + 61 CACTGAGTnAACGTAAAGAATTGTGGGAGAGAAGCCTTTAGTTAGATCATGCAGGGCtCC + + +121 GTGCTCCAAATGGGCTTTGTGTTTTG + <<<<<<<<<<<<<<<<<<<< + +> Contig112_chr15_26772864_26773267 374 C T + +> Contig119_chr16_6160274_6160477 180 G A + +> Contig60_chr16_28079136_28080263 588 T G 5.998983 + NsiI + 1 TTAGAGAATTATTCACTCCCCCAAAAGTAATAAAAATATAAGAAACAAAGCATAATCATA + >>>>>>>>>>>>>>>>>>>>>> + + 61 ATGCAnTGGTTGAGTTAGTAGTAAATAACATTTTAGGGTCATAAATTAAAAACTGAATTG + + +121 AGATTTAGCTGGAAATTGTGATATAAATGTCAGGATAAGAGAAGCAAGATTGAAAGAAAG + + +181 ATGGATTAAAAATGCTAAATCCTTCTCTACTATTACAGGAAATTGATAAAAGAAGAGAGA + <<<<<<<<<<< + +241 GGAAACAGCACATAT + <<<<<<<<<<<< + +> Contig31_chr17_12128267_12129637 205 G A 0.246305 + + 1 TGGAGGCAATGGAGGTGAATGAGCCCCAGTCCTGGACCTCgAAGCAGACTGGCCAGAGAC + >>>>>>>>>>>>>>>>>>>> + + 61 ACCAGGATTTAAGGCATGTGATGAAGACACAGTTCAAAGTGACGAGCCCTGCAGACTCTT + + +121 CnGGAGCAGAGgTAGAGTGATGACCCGTACCTGGAAGGTTTTAGGAAGGATAACAATGAA + < + +181 TTTACCAGAAGGCAGGGGTAGA + <<<<<<<<<<<<<<<<<<< + +> Contig99_chr17_26021506_26022200 505 C T 0.171977 + RsaI + 1 TGTTGCCATGTTGCCAGTATGTTTTTTTAAGTTTTCCTTTTTAATTTCATTTATGATATT + >>>>>>>>>>>>>>>>>>>> + + 61 TTTTGGAGTAnaGAAGTTATCATTTCACATGATCAACTTTTCAGTCTTTTTCTTTATAAT + + +121 TTTTAAcTTTGTTGTCATGTTTAGAAAGGTTAAATTTATACCTTGTAAAATAcCTTCgCA + + +181 AATTTACATTTGGGAAATTATTAGTAGTATTATTTcAGGAAGTTaTTATTTTTAAGTGTT + <<<<<<<<<<< + +241 GGGTTCCCGTGA + <<<<<<<<< + +> Contig27_chr17_61713766_61716585 1056 G C 2.199527 + Eco47III,HaeII + 1 AAGAGGCGCAGGAAGGAGAGTCCGCCcGCCGCAGCCCGCCCGCCGGCTCCTCAGACAGCn + >>>>>>>>>>>>>>>>>> + + 61 CTCGCaGGTCCTCCAGCCTTCCAGCGAGAAGAAAGAAAGAGCGTCACCGGAAACCACCGA + <<<<<< + +121 AACTCTGGGGTAGAGCG + <<<<<<<<<<<<<< + +> Contig229_chr18_3706523_3708577 1076 A G 0.444778 + + 1 TTTAAACTCCCGTGTCTGTGCTTGATTATGGCACCGTTAcTCTCGGACGTATTTAATTTT + >>>>>>>>>>>>>>>>>>>> + + 61 CTGATTCTGATTCATTGGTCTATTACATGAGCAATTGGTGGnAAGTGATGTCTGTCTGTG + + +121 GCCCTTACATTATTTATAATAAAACTCTCCTTCAAAGAACCTTTGGACGATGTCTCCACA + + +181 ATTACAGAATGAGTACAAATTAGTTTTCTAAAACAGCAACTGGTGGTTAATTAAGTTTTG + + +241 TCATGTTTTCTGGAGATGAGTGTCTCATGGTTTGGATACTATGAAGGCATTTCTGCAAGG + <<<<<<<<<<<<<<<<<<<< + +301 TT + + +> Contig82_chr18_27305489_27306229 566 C T 0.348750 + + 1 CCTGAGAACTTCAAGCTCAGCGGAGGGCTGAAAGGGAGGTAACCACTTTTGTACTAAATT + >>>>>>>>>>>>>>>>>>>> + + 61 GTCACCTCCTTGCTTATTTTCGTGAAGTTCTAAAGAACACAACTATCTCACTAACACAAc + + +121 AGATTTATTATTGAGTTGTCAGAATCAGCAGCTTTTAGTCACngGTCACTTGTGTGCCTC + <<<<<<<< + +181 CACTCCATCATAACT + <<<<<<<<<<<< + +> Contig64_chr18_55979770_55980315 49 G A 2.123800 + + 1 CCCCAAGGAGACAGGAGGGCAGGCTGTGTGGGTTTCCTGGCCCGCAAnCCCTGTGCAGGT + >>>>>>>>>>>>>>>>>>> + + 61 GCGgTTCTGCCAGGCCCGCAAATCTCGGTCTCACTTAACTGCGGCATCATTTATGCTAAT + <<<<<<<<<<<<<<<<<<<< + +121 G + + +> Contig146_chr19_5221790_5223013 143 A G 0.869806 + + 1 TTAGAATGGCTTTTTCACGGAAGGAGATGAGTTATAAAGTACGGgTGACATTTTTTTGTC + >>>>>>>>>>>>>>>>>>>> + + 61 TTGnGTTTTTTTTTTTTGTCTTGTTTTTAACTGTTGTTTAAGTCAGCCAACAAGTACATA + + +121 ATTTCTCAGCCCACATTTAAAAATTATCAACTCATTTTCACTTGGAGGTGTGGACATAAA + + +181 GCCATAAATATAATTTGCATTCTGCTGACCTGTTTC + <<<<<<<<<<<<<<<<<<<< + +> Contig129_chr19_25541958_25542221 202 T C 2.550968 + HinfI + 1 AAGAATCAAGCATGCATTCTGCCTTCCCCATGTGAAAAGTACCAGGTGAGGATATGTACC + >>>>>>>>>>>>>>>>>>>> + + 61 TCTTTATATCCATGTTCCAAGAACAACAACAACAACAAAAGAATGAGAGTnACCACTTTA + + +121 CAACCCCCAAAGAATTAATGGATT + <<<<<<<<<<<<<<<<<<<<< + +> Contig60_chr19_54013816_54014398 281 A G 1.271267 + + 1 ATTTCTCTCGCCGATATTGAGGTTAAGTATCCCTCTAGGCTAAAAGACCAGCAGCTTTTC + >>>>>>>>>>>>>>>>>>>> + + 61 TTAAACCTATTACAGGAATCCCAATAATGGAAAGAAACGAGGGGAGGCAGTGCTCATGTC + + +121 ACATTCTTCCAGAAATCAAATATAnTGGGTTTTTTTGTTGACGTAAATACATAGGTTGGA + + +181 AAAAAGGTAGGGGGAAAGGAAAA + <<<<<<<<<<<<<<<<<<<< + +> Contig50_chr20_12138509_12141975 3206 C A 0.383804 + + 1 TCACACCAGGCTCAAGGTTAAGGCAGAACACAAGATAAGAGAGCAAGCTGGCTTCCTGTC + >>>>>>>>>>>>>>>>>>>> + + 61 CCCCAGCTGGCtTnCCCATGGGAGCAGAAGCTGGATGGGTGCAGCTGCTGGCTAGGGATC + + +121 CTGTAAAAACTGAAGACCTCCaGTCTCCAGGGCTGGAGGaGGGATTCCTGCCCTGGGGGC + + +181 AGGCCaGATGAGAGGGATGCGATAATGGCAGGTGTCTCCACAAGA + <<<<<<<<<<<<<<<<<<<< + +> Contig36_chr20_32631363_32632049 176 G A 1.149790 + + 1 CTGCCCGAAACAAGTTCCTCATTGTTTCCTnCGTTCTGTGCTGTGGCGGTTTCTTCCTGG + >>>>>>>>>>>>>>>>>>>> + + 61 ACCCAGAGTCCTTTTCcGAACATTAGCAACTCCATTATGCCACACAGAGATGAGATTTGA + + +121 GAAAGGAAAATAAAGTTGTCTCGTGATATGGAGGGCAAAGCTGATAG + <<<<<<<<<<<<<<<<<<<< + +> Contig50_chr21_4178523_4178687 121 G A 0.483377 + + 1 GTAGGAATCTCAAGCCCCAATCTACTTTTCAGGAAGCTGAGGCTCAGAGAAGTAAAGTAA + >>>>>>>>>>>>>>>>>>>> + + 61 CATGCTCAAATTCACACCAGTAAGTGAGAGAGTTnTAAGTAACTATAGTAAGTGACAGAG + <<<< + +121 CTGGGATTTGAACCCTCAT + <<<<<<<<<<<<<<<< + +> Contig129_chr21_31045749_31046924 381 A G 0.028026 + AcyI,Hsp92I + 1 CAGCTGAAGCACCCTCTCTGACCAAACCTGATCTTTCTTTTGGGGATCCTTGACnTCTCA + >>>>>>>>>>>>>>>>>>>> + + 61 TAAGTCTTTATGAACCATTTGTCCTTCCAGCCATCATTTCCTTCAAC + <<<<<<<<<<<<<<<<<<<< + +> Contig159_chr22_7896450_7896974 109 G C 0.465232 + + 1 TAACTGAGTGATAGTGCTTGGcGCAAGACACTAGCAAnCCTGTACTCACCTTCCATTCAT + >>>>>>>>>>>>>>>>>>>> + + 61 TTATGTCATAATAATAATTCTTTAAATATGGAAAGcGTAgAAACAAAATAGGAACACTGC + + +121 TAAGTATTCATTTAGGTAATAAGTTTAGTGCTAGATGTGTGACAGGAATTATTTTcATTA + + +181 ACCACAAGCAAACATTTATGGAATGTCCATTGCATGCTGAAATGTA + <<<<<<<<<<<<<<<<<<<< + +> Contig23_chr22_34612023_34612568 167 C G 0.409430 + + 1 TATTCTACCACTCAAAGCCAGCCTGAAGGAAnCCTGGGcTCTTTCCATCAGCTATCTGAC + >>>>>>>>>>>>>>>>>>>> + + 61 AAGTTGATCTAAAcgTGTAGAAAGCATGCCTGGCTCCACACCTGATTTCATGTGGAGCCA + + +121 TCAGCTCTCACACGATCACCTTG + <<<<<<<<<<<<<<<<<<<< + +> Contig26_chr22_57817664_57819633 1453 A G 0.471213 + RsaI + 1 TGCcCACCCACATCAcTGAACAATTCAGAGAAGATTCCTTTAACATATGCATTCAATGTT + >>>>>>>>>>>>>>>>>>>> + + 61 TAAGCCTCGCTAACATTTTTTAAGCACCGAACCTTTTTAAAAAGGGCTCTAAAAAATAAG + + +121 CATGAAACTAAATCTCTCTAATACgTCACGTGACACACATGTAtATAACCCAGAAGGTnC + + +181 ATCTAGGGAAACGCAAAAGGAATTATG + <<<<<<<<<<<<<<<<<<<< + +> Contig133_chr23_3525134_3526502 1223 A G 1.358849 + + 1 TCCTATTTTGTCCCCAAGTCCCAGGTTCAGGAGCTCCATTAAGTCACAGGTAATTCAGCT + >>>>>>>>>>>>>>>>>>>> + + 61 GAGAGCCTGCAAAATGGCAACCCCACCTGAGGCACTTTCTTTAAATCAACTGTATCAAGG + + +121 TAACATTTACACAGAATAAnAAGCACTCATTTTAAAGAAATAGCTTGATGAGTTTAGTCT + + +181 AATTGTATCTGTGTAGCCACcACACAGTCAAGATa + <<<<<<<<<<<<<<<<<<<<< + +> Contig35_chr23_28447813_28449115 70 T A 0.163155 + DdeI + 1 CCTTTTCTCTCCATTCACACCCCATCCTTCTTnGTCCCTCCAAAACTCCTAGCTGTTTCC + >>>>>>>>>>>>>>>>>>>> + + 61 CATTTTAGGGTCTCTGCATTTGCTGTTCCAAGCAAGCTCTGCCCCCAAATGATCTGGTGG + <<<<<<<<<<<< + +121 CTTGTTCCCTC + <<<<<<<< + +> Contig50_chr24_22515247_22516072 761 C T 0.190253 + + 1 GGGGAGACCCTGATCCATCCTCATTCTACTGCTTCCCGATGTCCCAGGCCTGCtGTTCTA + >>>>>>>>>>>>>>>>>>>> + + 61 CACGAAAGCCCATTCngTGCCTCCAAGTAGGGAGCAGAAGGGAAGAACACA + <<<<<<<<<<<<<<<<<<<< + +> Contig84_chr24_29196623_29199644 466 C T 0.214603 + + 1 TAATTGGACaCTTTTGACTTGCGTTTCATGATTTTGCCCCATTTTTCTCTGCnGCAATTT + >>>>>>>>>>>>>>>>>>>> + + 61 GGCCAGTGATTCCTGTCTTTCCCTCTATTATCCACTCTGATTAACTCAGCTGCACCTGCC + <<<<<<< + +121 AGCCTTTATTCCTGCA + <<<<<<<<<<<<< + +> Contig144_chr25_4011170_4013134 541 A G 0.086768 + + 1 GGTGAGGtGGAGAGTGGCAAGAGCTGTTGGTGGGCGTGTGTGAGCCAGAGGGCAAGCGGG + >>>>>>>>>>>>>>>>>>>> + + 61 GAGCTCCTAACTGCAAcATCCAGGGGCAGTCGATACTGCCTGGGAAGTAGGAACTGCTCT + + +121 GGAGCATGAGTGGAATTAGCAGATGGATAACAAGGGAGnGCGAcaAGGGCATTTTATGAA + + +181 GATGGAACACCTTGGAAAAGATCAGATTGCTGAAGCATCCGTTTGAGAAAGCACAGATAA + + +241 CTTTTCAAATCTGAAGAGGAGGGACATGACGGGGAGATGAGACTAG + <<<<<<<<<<<<<<<<<<<< + +> Contig103_chr25_38891221_38892140 407 G A 0.166581 + Alw44I,Bsp1286I,CfoI,HhaI + 1 GTGGGATGCAGGTGCTGTGTCTACCCACTTCTTCCGGGGACCAGCCCCTCTCTGGCCACA + >>>>>>>>>>>>>>>>>>>> + + 61 CCCACTTCCTCTCATCTTAACTGTCCAAATTTGCTGACTCAAAGGGATGTGTGTGCGTAT + + +121 GTGTGTGTGTGCnCACATGTGCATGCATGTGTTTTGTGTCTTTCACTCTCAAAATTATTT + + +181 AAGTTCCCATGGCCCTGCCCTGATTTATCTCCCAAAT + <<<<<<<<<<<<<<<<<<<< + +> Contig204_chr26_4311195_4311778 170 C T 0.085422 + + 1 AACAGAAGCCTGTCCCAGCTACAGGAGGGAAACGGGCTCGGCAgCgTGGCACTGCCTCAC + >>>>>>>>>>>>>>>>>>>> + + 61 tGTCACCCCCAGGAGCCCGGGAAGCCGTCCCTTGAnTCCTCAGTGACGGTGACCATGACC + + +121 AAGGGCAGTAACTCTGCCCGCGGGACACAGCgCTCCTGCTCCCgACgGAAGGTGTGCCGG + + +181 CCACAGAGCGCACGTTGgGGCCgAGTTCAGGGGCAGAtAGGAAGACACAGGa + <<<<<<<<<<<<<<<<<<<< + +> Contig146_chr26_26622638_26623906 574 G A 0.318381 + + 1 TTTCTGAGATCACACAGCCAGGAAATGGGGAGCCAAGATTTGAACCCAAGCCTGTCTGAT + >>>>>>>>>>>>>>>>>>>> + + 61 TCTGGAACCTGCACCAGAaCCACACCTCAgCCCTGCCTTCCCTTGGAAGGCTtACcnTTG + + +121 TGCCTGGAACATAGTAAGTGCTCAAAAAATGGTcTAAATCATCATCGTGTaTTAGGAAGC + + +181 CTGGGTCCACACCCCTTGGGCTGTGGAGTGTCTTGAG + <<<<<<<<<<<<<<<<<<<< + +> Contig135_chr27_6853874_6854079 158 C T 0.060201 + + 1 AAAGGGTTCCAAGTTACGGGATTcATACGGGAAGGCTCCcGAAATAGAAATGATCGTTGT + >>>>>>>>>>>>>>>>>>>> + + 61 AACATGGGGAGATTTGTCAGGGACAgACAnGAACTGTCTTATAAAATGCAGCCCAGTTTT + <<<<<<<<<<<<< + +121 CTTcTTGAGA + <<<<<<< + +> Contig64_chr27_34654435_34654621 132 C A 0.296658 + RsaI + 1 AAATTGGTCAGTGACTGGGAACACGTTCCGAACCAGCTCCGTGGATTTACAAGTTTTCCA + >>>>>>>>>>>>>>>>>>>> + + 61 GTAGAAACGGTCCTCCAAGCGTnCCTGAAGTGCTcATTCATTACCGCAAGGTG + <<<<<<<<<<<<<<<<<<<< + +> Contig131_chr28_6481806_6483783 138 C T 0.387007 + + 1 AGaCCCTCGAAATTCTCCAGTTGTCAAATTCTTCCCCAGTnTCTGCTTGAgAGATTTTCT + >>>>>>>>>>>>>>>>>>>> + + 61 CCTAGCTTCAGAGCCTTAACTACAGAATACTGAGTCTTTGCTCAAGCAGCGGCTCAACAC + + +121 ATAACCCCTAAGCTGCCAAGGCTTTTCTCCCCCAAGACTTTGTTTCCTTCCACGAAACCT + <<<<<<<<<<<<<<<<<<<< + +181 TC + + +> Contig60_chr28_30197166_30197364 92 T C 1.139483 + + 1 AATCAGAAAGTCCCAGAGGTGGAGACTACAGCAAATTACCTGACATTTGTCTTTGATGCT + >>>>>>>>>>>>>>>>>>>> + + 61 nTATGTAAAAACTCTGGGTGGCAGGAAAGCACTTAAATTTACCTTGTAGAGCTTTGCTAC + + +121 CCAATAGAACATTCTGTGCTGATGGGAATG + <<<<<<<<<<<<<<<<<<<< + +> Contig29_chr29_4726399_4727143 559 A T 3.113735 + + 1 CTTTTTGTGGCCAAAAgTGACAACATAATTTTCAAAATGGGAAACGATGATTACAAATGA + >>>>>>>>>>>>>>>>>>>> + + 61 GTGGAACACATGTTACAGTGGCAAGATGTGTGAGCAATGCTGATTCAGGGTATAATGGGT + + +121 TGGTTGTAAAACAAATATGAGTTTCTAATATTCGGGCATATTAAACAATCTAAGTTnTAC + + +181 AAAATCTCTCTTGTACTATTTATTGGGTAACTACTAGTAAAGGAAAGGCCTAATAGGCTG + <<<< + +241 TTCCCATAAAAAGAAGCTAC + <<<<<<<<<<<<<<<<< + +> Contig1_chr30_5992217_5993068 106 C T 1.078937 + + 1 CTTGAGACAGCCATGGTGTTTGTTTCTACCTTTCCTCTAAGAAGACACCTGTATACAGAT + >>>>>>>>>>>>>>>>>>>>> + + 61 ATTCCnTGTGACTCACACTCATCCTCATAGACATCCCCAGTATCATTTCTGTGAAGCCTT + <<<<< + +121 CCTTGACATTTTCCAACA + <<<<<<<<<<<<<<< + +> Contig165_chr30_25804389_25804926 190 T C 0.328844 + + 1 CCGCTTGTCCCGCTCTGTGATTTAGATGTTTCACGAGCGGGAAGGTGGGGGGATTGATTC + >>>>>>>>>>>>>>>>>>>> + + 61 TCTnATTCGCGCTTCTCCGCCCAGGCTGCGCATTAGAATCACTTGGGGAGCTTTAAAACA + <<<<<<<<<<<<<<<<< + +121 TGCCAG + <<< + +> Contig38_chr31_5164423_5166573 2074 C T + +> Contig17_chr31_26433828_26434459 498 T C 4.814134 + + 1 CCATGCAATCTCATGCAATGGTTAAAAGCAATGAATTTGTTGTACTCAAAATACCTTGGT + >>>>>>>>>>>>>>>>>>>> + + 61 TGGGTATTAAAAAGTTTTnAGTAAACATAATGAAAAAAATAGAAGTAGATAAGATCAACA + <<<<<<<<<< + +121 CACCTACACGAATTA + <<<<<<<<<<<< + +> Contig9_chr32_19479532_19479735 12 A G + +> Contig30_chr32_25902721_25905783 208 C G 0.322381 + AluI,HindIII + 1 TTCACAGTGTTCTCCCAAGGCACAAATAGAATGCTCAGTCATTGGTTATTTTACTTAGAT + >>>>>>>>>>>>>>>>>>>> + + 61 ATTCCTTTCTAAATATAGACTTACCTGTTATTTTTTCCTATACTAATAACATTCAAATTA + + +121 TCTATGTGTACAATAATAAACACTAGGCATAACTGTATCTCAGTACCAATTTCCTTAGAA + + +181 GGTAAAnCTTATTTCAGTCAAGGTCTAGGCCAAGCATTGA + <<<<<<<<<<<<<<<<<<<< + +> Contig18_chr33_22207246_22209159 1363 G T 2.559961 + + 1 ACGACTGCCCTTTTTCCCTCTGTCTCTATTTCTCCTACACACACACACACACACACACAC + >>>>>>>>>>>>>>>>>>>> + + 61 ACACAgAGTGAGCTTTTAGCAACCTTGTTTAACATTTGGAAAGGAATAGCTGACACAACA + + +121 GAGGGGGGnATAAGTAAATACATTGCATGGCTGTATATAATTGAACATTCTTCAAATTCT + + +181 TTAAACAGAAATTTCAGTACCATGGAGATCCTTGAAAT + <<<<<<<<<<<<<<<<<<<<<< + +> Contig170_chr33_26189421_26189940 292 T C 0.307330 + + 1 TTCAAGTTCCCCTTTTGTGCCTTCACACACTTGTnTTTATGGTCTCTATTTAAAAAAAAg + >>>>>>>>>>>>>>>>>>>> + + 61 AGACAAAAACCTCTTAATAAATTTAGGAAGTAGTCCTCACTCTTTAAAGGAATTGTGCTT + + +121 AAAGCAGCAGCTCTTCCTCACTCCTTG + <<<<<<<<<<<<<<<<<<<< + +> Contig113_chr34_13341080_13341643 236 C T 0.412222 + Hsp92II,NcoI,StyI + 1 AAATGCTCATTTCCCAACATTCAGTGAAATGCCTGATGACTAATCCTTGCTCCaTGGATC + >>>>>>>>>>>>>>>>>>>> + + 61 CTGGGGGTCCCGTGCACAGAAGAGGGTGAGGTCTCTGCCAnGGACTGTGTCCCTGGAAAT + <<<<<<<<<<<<<< + +121 GACAGGGCA + <<<<<< + +> Contig152_chr34_31794848_31795540 242 G A 2.779642 + + 1 ATAGGAAATAAAACCCCAGCTCTCAGAGnAAAGCAAAATACTTTTAAAAAGATGAAAAAG + >>>>>>>>>>>>>>>>>>>> + + 61 CATCAGAGCTATGAGACACAGAAGATCTAGAGTATAATTGTGTTTTTGTATAGAAGGGAG + + +121 AGAAGGAATGCTGCAGGAGCCACATTTCTCCATCTA + <<<<<<<<<<<<<<<<<<<< + +> Contig47_chr35_3666773_3667898 348 G T 0.234571 + + 1 GCTGCCTCAGCAGTTATCTTGGGTTCTGTTAACTTTGACACACCTTTCACGAAGAAATTC + >>>>>>>>>>>>>>>>>>>> + + 61 TTCATTGCAGTGCTTGAACAATCTGATTGTTCAATCTGATTnGATTCTATTTCTTGCTGA + + +121 GATAATGTTCTAGCACCTTCTCTGTGGATCCCCTTAT + <<<<<<<<<<<<<<<<<<<< + +> Contig74_chr35_25394343_25394813 303 A T 4.297720 + + 1 AGTTTCCCCAAATGTTCATGATTAACCAGGTAAACTGAAGATTAACCTTAAATATATATT + >>>>>>>>>>>>>>>>>>>> + + 61 TCTTTGAGTCATTATAATTAAATTAACTAGGTTGTTTTCAAATATACTAATAATAGAAAC + + +121 TGAAAAAATAATCCAAGTAATATaTCTGAATTGAAAAAAAAAGTAAGGCCATTGTATAAA + + +181 ACAACTGAAAGTTTTTGGAnAAGGTACTATTTTTAATTTACAGTGCATTTTTTTAATCGG + + +241 CATTTCAAATAATAACTTCAATCaCACACACAAAAATAAACCAAATCAACTGCATGTAAG + <<<<<<<<<<<<<<<<<<< + +301 GGaAGT + <<< + +> Contig5_chr36_4562983_4563634 343 C T 1.168507 + + 1 ATATGAATGGTGGTGATGGATTCAGCATCTTGACTCTTTTTCAACTATGTCAAGATTTGC + >>>>>>>>>>>>>>>>>>>> + + 61 ACTGGATCTTGTCTAAAGTCACTCTTCTAGGGGAAGTCAAAGAGACTGGGTCaGTCCtCA + + +121 AGATAcGATGTAAGCAGGTAAGATAGCACTATAGTAGGTCTTCTTGTCATGGTGAGTCAA + + +181 TAACCATTCAATATTCTTTCnACCTACTCTTTACCTGCTCAATCAAGGTAGGGGTC + <<<<<<<<<<<<<<<<<<<< + +> Contig133_chr36_32954045_32955409 136 A G 3.772017 + TaqI + 1 ATTAAATGAAAACAGTGTCAGGCAATAAGATGTATTAAGTACAGTATGCCTGAGGATATA + >>>>>>>>>>>>>>>>>>>>>> + + 61 ATATTAAACACAGATTCTGCTGTTACTATCnAAGTGGATATTAAAATAACAGTGCTACTT + + +121 TGAGGGTAATGCTACTTTGGAGAATATTTTCTAATAAGCTCACCaTAAAATGACggATAA + <<<<<<<<<<<<<<<<<<<< + +> Contig53_chr37_6665763_6665919 116 C T 10.874746 + BstOI + 1 AGTCCTCATGTTGTACTTTACCTCACCTGAATTTACTCATCtGATAGTTGGAAATTTGTA + >>>>>>>>>>>>>>>>>>>>>>>> + + 61 TCCATTGCCCATCtTCACCACCCCATGTCnCTGGAAACCAACAAtCTGTTCTCTGTATGa + <<<<<<<<<<<<<<<<<<<<<<<<< + +121 CTT + + +> Contig2_chr37_31197993_31198256 182 C T 0.594606 + + 1 CTCTCACCACATGGAGAATCCTGTATGTTCAGCTGTATGACGTGGGGGGAACGTCAGAGC + >>>>>>>>>>>>>>>>>>>> + + 61 TCAGTTTCATAGCAGTCAGCTCCATGTTATGGGTTCAAgAnGAAAACAGGTGGCAGGCtT + + +121 GCCACAGCCTCCCTCAGGGGTGgCCTTGACAGATAAAcGT + <<<<<<<<<<<<<<<<<<<< + +> Contig7_chr38_12217200_12218387 1163 A T + +> Contig265_chrX_2689247_2689484 114 C G 9.232233 + + 1 CTTAGAGAATTCCCTGATTCACTGAGTTAAATTATTACCAAATCTGATAATAATAAAAGA + >>>>>>>>>>>>>>>>>>>>>>> + + 61 AGTAATTACAGATCAATAATTAATCTATATGTCTGAATACATTTTAATAAGTCCnAcTCA + + +121 ACAATATGCTGACAAAACAATACATCTTGTCT + <<<<<<<<<<<<<<<<<<<<<<< + +> Contig113_chrX_26287829_26288398 385 C T 0.077485 + + 1 AAAGCCGTAACAGTCGCTAGGAGAATCATAATTTTAAGCTTTGTGTGTCCCGGGcTTGAG + >>>>>>>>>>>>>>>>>>>> + + 61 TCCCTCAGGAGTAGTTAGATGCGGCCTTAAATTCTCcCAGTAAATTCACnTTGACGGCCT + + +121 ATTTTTGACCTGGGGGCACACGCTGCTATACACTCTAGCCACCTCTGATCCTCTGGCCTC + + +181 CTCTGTTACAATGACAGAAACGACAGAAGCATTTCTTTAAAATAAGTCCCAGTACGTGCA + + +241 CACAAACGTTCAGGGCAGCCTTCTCCATAAACGGCACGAAATGGC + <<<<<<<<<<<<<<<<<<<< + +> Contig90_chrX_57430715_57431566 548 C T 0.153995 + EcoRV + 1 CTCATTCCCAGCTACCTCCACCTCTATACCAACCCCTAGTTCCTGTACATCCCTGCTTCT + >>>>>>>>>>>>>>>>>>>> + + 61 ATAGGAAATCTTCCTGGTGTTGATATnATTCCCAAGGTCAGGCTGTCCTCCTAGCTCCCT + + +121 CTCCTCATCTGCATCAAGTCCTCCAAACTGGGCAGTAGAC + <<<<<<<<<<<<<<<<<<<< + +> Contig133_chrX_84833782_84834125 182 G A 0.277794 + + 1 CACCAGAGTGCAATCGAGAACCATCTGATCACAGAACCATAGAAAAGATTGCTGTACAAG + >>>>>>>>>>>>>>>>>>>> + + 61 ACTTAGGAACTCATTCTGTTCAGGATGGAGAAGCTGATGCCCAAAAAGGGAAAGGAACTT + + +121 AACCAAAGTCCATACAnTATCAACTCTACACATAAAGGAAGGGAGTGGAGGGAGCAGTAA + + +181 GACCAGAGATATAGACCCCAGTGAGGAGGCTGTGAGCTCCTG + <<<<<<<<<<<<<<<<<<<< +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/find_intervals/find_intervals.interval Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,1 @@ +chr2 9817960 67331624 1272.2000
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/map_ensembl_transcripts/map_ensembl_transcripts.tabular Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,150 @@ +ENSCAFT00000000001 476153 cfa00230=Purine metabolism.cfa00500=Starch and sucrose metabolism.cfa00740=Riboflavin metabolism.cfa00760=Nicotinate and nicotinamide metabolism.cfa00770=Pantothenate and CoA biosynthesis.cfa01100=Metabolic pathways +ENSCAFT00000000144 483960 N +ENSCAFT00000000160 610160 N +ENSCAFT00000000215 U N +ENSCAFT00000000233 483973 N +ENSCAFT00000000365 474414 cfa00450=Selenocompound metabolism.cfa00970=Aminoacyl-tRNA biosynthesis +ENSCAFT00000000507 484023 N +ENSCAFT00000000517 476233 N +ENSCAFT00000000674 611986 N +ENSCAFT00000000724 609478 N +ENSCAFT00000000760 U N +ENSCAFT00000000762 U N +ENSCAFT00000001047 475067 cfa00240=Pyrimidine metabolism.cfa00410=beta-Alanine metabolism.cfa00770=Pantothenate and CoA biosynthesis.cfa00983=Drug metabolism - other enzymes.cfa01100=Metabolic pathways +ENSCAFT00000001052 U N +ENSCAFT00000001063 481999 N +ENSCAFT00000001076 U N +ENSCAFT00000001104 607591 N +ENSCAFT00000001141 484064 N +ENSCAFT00000001146 475076 N +ENSCAFT00000001204 481203 N +ENSCAFT00000001219 474465 N +ENSCAFT00000001250 481729.481731 cfa04145=Phagosome.cfa04514=Cell adhesion molecules (CAMs).cfa04612=Antigen processing and presentation.cfa04672=Intestinal immune network for IgA production.cfa04940=Type I diabetes mellitus.cfa05140=Leishmaniasis.cfa05145=Toxoplasmosis.cfa05150=Staphylococcus aureus infection.cfa05152=Tuberculosis.cfa05164=Influenza A.cfa05166=HTLV-I infection.cfa05168=Herpes simplex infection.cfa05310=Asthma.cfa05320=Autoimmune thyroid disease.cfa05322=Systemic lupus erythematosus.cfa05323=Rheumatoid arthritis.cfa05330=Allograft rejection.cfa05332=Graft-versus-host disease.cfa05416=Viral myocarditis +ENSCAFT00000001352 482026 cfa00565=Ether lipid metabolism +ENSCAFT00000001363 475084 cfa03022=Basal transcription factors +ENSCAFT00000001421 484096 N +ENSCAFT00000001523 475088 N +ENSCAFT00000001575 481744 cfa04141=Protein processing in endoplasmic reticulum +ENSCAFT00000001587 482035 N +ENSCAFT00000001597 609411 N +ENSCAFT00000002056 610014 N +ENSCAFT00000002100 U N +ENSCAFT00000002110 481249 N +ENSCAFT00000002175 476310 N +ENSCAFT00000002259 484151 N +ENSCAFT00000002460 481785 N +ENSCAFT00000002537 U N +ENSCAFT00000002577 484157 N +ENSCAFT00000002578 608906 N +ENSCAFT00000002660 U N +ENSCAFT00000002792 474523 N +ENSCAFT00000002849 475216 N +ENSCAFT00000002999 U N +ENSCAFT00000003163 474921 cfa03040=Spliceosome +ENSCAFT00000003223 474925 N +ENSCAFT00000003307 609995 N +ENSCAFT00000003515 482316 N +ENSCAFT00000003560 U N +ENSCAFT00000003644 484216 cfa00970=Aminoacyl-tRNA biosynthesis +ENSCAFT00000003824 475249 N +ENSCAFT00000003840 482333 N +ENSCAFT00000004092 474960 N +ENSCAFT00000004103 484298 N +ENSCAFT00000004208 481637 N +ENSCAFT00000004253 100534006.100534007.474588 N +ENSCAFT00000004311 482346 N +ENSCAFT00000004464 481892 N +ENSCAFT00000004511 481893 N +ENSCAFT00000004609 611755 N +ENSCAFT00000004673 611817 N +ENSCAFT00000004726 610047 cfa00230=Purine metabolism.cfa00240=Pyrimidine metabolism.cfa01100=Metabolic pathways.cfa03030=DNA replication.cfa03410=Base excision repair.cfa03420=Nucleotide excision repair.cfa03430=Mismatch repair.cfa03440=Homologous recombination.cfa05166=HTLV-I infection +ENSCAFT00000004799 U N +ENSCAFT00000004933 482382 cfa04621=NOD-like receptor signaling pathway.cfa05133=Pertussis +ENSCAFT00000004993 474995 cfa03008=Ribosome biogenesis in eukaryotes +ENSCAFT00000005126 U N +ENSCAFT00000005142 606804 N +ENSCAFT00000005225 475647 N +ENSCAFT00000005323 U N +ENSCAFT00000005467 U N +ENSCAFT00000005496 481925 N +ENSCAFT00000005518 492302 cfa02010=ABC transporters.cfa04971=Gastric acid secretion.cfa04972=Pancreatic secretion.cfa04976=Bile secretion +ENSCAFT00000005653 403417 cfa04145=Phagosome.cfa04620=Toll-like receptor signaling pathway.cfa05132=Salmonella infection.cfa05133=Pertussis.cfa05134=Legionellosis.cfa05140=Leishmaniasis.cfa05142=Chagas disease (American trypanosomiasis).cfa05144=Malaria.cfa05145=Toxoplasmosis.cfa05146=Amoebiasis.cfa05152=Tuberculosis.cfa05162=Measles.cfa05164=Influenza A.cfa05323=Rheumatoid arthritis +ENSCAFT00000005746 476410 cfa00071=Fatty acid metabolism.cfa03320=PPAR signaling pathway.cfa04920=Adipocytokine signaling pathway +ENSCAFT00000005749 610007 N +ENSCAFT00000005832 403584 cfa04060=Cytokine-cytokine receptor interaction.cfa04630=Jak-STAT signaling pathway.cfa04672=Intestinal immune network for IgA production.cfa05166=HTLV-I infection.cfa05168=Herpes simplex infection.cfa05323=Rheumatoid arthritis +ENSCAFT00000005972 475012 N +ENSCAFT00000006025 482980 N +ENSCAFT00000006114 483829 N +ENSCAFT00000006157 475021 N +ENSCAFT00000006219 483261 cfa04972=Pancreatic secretion.cfa04978=Mineral absorption +ENSCAFT00000006272 484394 cfa00280=Valine, leucine and isoleucine degradation.cfa00290=Valine, leucine and isoleucine biosynthesis.cfa00770=Pantothenate and CoA biosynthesis.cfa01100=Metabolic pathways +ENSCAFT00000006453 475893 N +ENSCAFT00000006479 U N +ENSCAFT00000006507 484622 cfa03030=DNA replication.cfa04110=Cell cycle +ENSCAFT00000006669 476094 N +ENSCAFT00000006689 475897 N +ENSCAFT00000006827 U N +ENSCAFT00000006891 610021 N +ENSCAFT00000007130 485445 cfa04020=Calcium signaling pathway.cfa04080=Neuroactive ligand-receptor interaction +ENSCAFT00000007145 607961 N +ENSCAFT00000007244 476781 N +ENSCAFT00000007375 403767 cfa04977=Vitamin digestion and absorption +ENSCAFT00000007440 482516 N +ENSCAFT00000007467 485576 N +ENSCAFT00000007484 609336 N +ENSCAFT00000007527 607108 N +ENSCAFT00000007553 487123 cfa03450=Non-homologous end-joining.cfa05340=Primary immunodeficiency +ENSCAFT00000007697 475382 N +ENSCAFT00000007703 477019 cfa03430=Mismatch repair.cfa03460=Fanconi anemia pathway.cfa05200=Pathways in cancer.cfa05210=Colorectal cancer.cfa05213=Endometrial cancer +ENSCAFT00000007747 U N +ENSCAFT00000007774 477021 cfa04510=Focal adhesion.cfa04512=ECM-receptor interaction.cfa04514=Cell adhesion molecules (CAMs).cfa04810=Regulation of actin cytoskeleton.cfa05410=Hypertrophic cardiomyopathy (HCM).cfa05412=Arrhythmogenic right ventricular cardiomyopathy (ARVC).cfa05414=Dilated cardiomyopathy +ENSCAFT00000007776 U N +ENSCAFT00000007779 478007.478008 cfa03060=Protein export.cfa04141=Protein processing in endoplasmic reticulum.cfa04145=Phagosome +ENSCAFT00000007859 483010 N +ENSCAFT00000007951 U N +ENSCAFT00000007959 482810.611087 N +ENSCAFT00000008012 485173 N +ENSCAFT00000008063 484489 N +ENSCAFT00000008142 476128 N +ENSCAFT00000008198 612489 N +ENSCAFT00000008413 U N +ENSCAFT00000008540 483021 N +ENSCAFT00000008586 484499 N +ENSCAFT00000008588 U N +ENSCAFT00000008673 478018 N +ENSCAFT00000008678 485188 N +ENSCAFT00000008728 U N +ENSCAFT00000008769 485523 cfa02010=ABC transporters.cfa04976=Bile secretion +ENSCAFT00000008831 475398 N +ENSCAFT00000009074 485769 cfa04330=Notch signaling pathway +ENSCAFT00000009114 483354 N +ENSCAFT00000009614 475416 N +ENSCAFT00000009698 486001 N +ENSCAFT00000009710 486002 N +ENSCAFT00000010094 486223 cfa00230=Purine metabolism.cfa00240=Pyrimidine metabolism.cfa01100=Metabolic pathways.cfa03030=DNA replication.cfa03410=Base excision repair.cfa03420=Nucleotide excision repair.cfa05166=HTLV-I infection +ENSCAFT00000010141 482857 cfa04360=Axon guidance +ENSCAFT00000010439 610992 N +ENSCAFT00000010496 415126 cfa04380=Osteoclast differentiation.cfa04916=Melanogenesis.cfa05200=Pathways in cancer.cfa05218=Melanoma +ENSCAFT00000010516 U N +ENSCAFT00000010531 484693 N +ENSCAFT00000010559 483405 N +ENSCAFT00000010593 U N +ENSCAFT00000010616 474176 cfa03450=Non-homologous end-joining.cfa04110=Cell cycle +ENSCAFT00000010630 486770 N +ENSCAFT00000010829 486944 N +ENSCAFT00000010865 U N +ENSCAFT00000010931 485368 N +ENSCAFT00000010977 U N +ENSCAFT00000010988 482891 cfa04145=Phagosome +ENSCAFT00000011187 475441 N +ENSCAFT00000011380 U N +ENSCAFT00000011397 475750 cfa04110=Cell cycle.cfa04114=Oocyte meiosis.cfa04120=Ubiquitin mediated proteolysis.cfa04914=Progesterone-mediated oocyte maturation.cfa05166=HTLV-I infection +ENSCAFT00000011721 475621 N +ENSCAFT00000011730 486534 N +ENSCAFT00000011771 477193 N +ENSCAFT00000011789 609978 N +ENSCAFT00000011968 488881 cfa00760=Nicotinate and nicotinamide metabolism.cfa04146=Peroxisome +ENSCAFT00000012081 478082 cfa04621=NOD-like receptor signaling pathway +ENSCAFT00000012133 611998 N +ENSCAFT00000012159 484609 N +ENSCAFT00000012254 U N
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/modify_snp_table/modify.wsf Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,338 @@ +Contig161_chr1_4641264_4641879 115 C T 73.5 chr1 4641382 C 6 0 2 45 8 0 2 51 15 0 2 72 5 0 2 42 6 0 2 45 10 0 2 57 Y 54 0.323 0 +Contig20_chr1_21313469_21313570 66 C T 54.0 chr1 21313534 C 4 0 2 39 4 0 2 39 5 0 2 42 4 0 2 39 4 0 2 39 5 0 2 42 N 1 +99. 0 +Contig86_chr1_30984450_30985684 670 C T 365.0 chr1 30985133 C 9 0 2 54 10 0 2 57 13 0 2 66 3 0 2 36 9 0 2 54 7 0 2 48 Y 145 0.031 0 +Contig5_chr1_32562160_32563940 1215 G T 163.0 chr1 32563356 G 17 0 2 78 19 0 2 84 20 0 2 87 14 0 2 69 12 0 2 63 10 0 2 57 Y 17 0.251 0 +Contig110_chr1_33385093_33386888 510 C T 270.0 chr1 33385587 A 14 0 2 69 11 0 2 60 19 0 2 84 11 0 2 60 10 0 2 57 13 0 2 66 Y 13 0.126 0 +Contig100_chr1_33562920_33564288 743 C T 178.0 chr1 33563655 C 6 0 2 45 10 0 2 57 8 0 2 51 5 0 2 42 13 0 2 66 7 0 2 48 Y 13 0.090 3 +Contig7_chr1_37302355_37302489 97 A G 59.2 chr1 37302452 G 3 0 2 36 8 0 2 51 5 0 2 42 8 0 2 51 7 0 2 48 6 0 2 45 N 56 2.812 0 +Contig62_chr1_41880715_41882180 1078 T G 57.6 chr1 41881785 T 14 0 2 69 15 0 2 72 16 0 2 75 13 0 2 66 8 0 2 51 10 0 2 57 Y 21 0.477 0 +Contig47_chr1_48409178_48409384 37 C T 134.0 chr1 48409215 T 5 0 2 42 6 0 2 45 8 0 2 51 9 0 2 54 4 0 2 39 6 0 2 45 N 66 +99. 0 +Contig119_chr1_49647683_49650077 1618 C A 99.7 chr1 49649276 A 8 0 2 51 11 0 2 60 10 0 2 57 9 0 2 54 10 0 2 57 14 0 2 69 Y 16 0.166 0 +Contig21_chr1_60697952_60699446 307 G A 51.9 chr1 60698265 G 12 0 2 63 9 0 2 54 4 0 2 39 6 0 2 45 9 0 2 54 4 0 2 39 Y 98 0.507 0 +Contig131_chr1_62319542_62320564 169 C G 103.0 chr1 62319709 C 12 0 2 63 12 0 2 66 14 0 2 69 12 0 2 63 9 0 2 54 9 0 2 54 Y 73 0.307 1 +Contig14_chr1_63450425_63450680 101 T A 102.0 chr1 63450530 T 8 0 2 51 10 0 2 57 18 0 2 81 8 0 2 51 8 0 2 34 8 0 2 51 N 99 1.085 0 +Contig83_chr1_63869778_63869942 40 T C 23.7 chr1 63869819 C 5 0 2 42 7 0 2 48 2 0 2 33 4 0 2 39 6 0 2 48 4 0 2 39 N 654 1.364 0 +Contig30_chr1_64702572_64703138 178 A T 117.0 chr1 64702750 T 10 0 2 57 10 0 2 57 20 0 2 87 21 0 2 90 6 0 2 45 12 0 2 63 Y 50 3.872 0 +Contig101_chr1_69868406_69868872 287 G A 14.6 chr1 69868689 G 13 0 2 66 17 0 2 78 10 0 2 57 8 0 2 51 7 0 2 48 8 0 2 51 N 137 0.305 0 +Contig35_chr1_74482577_74482791 170 G A 45.4 chr1 74482751 A 3 0 2 36 4 0 2 39 13 0 2 66 2 0 2 33 5 0 2 42 2 0 2 33 N 20 +99. 3 +Contig49_chr1_83865731_83865944 85 G A 34.1 chr1 -1 N 4 0 2 39 4 0 2 39 8 0 2 51 2 0 2 33 5 0 2 42 4 0 2 39 N -1 1.485 0 +Contig129_chr1_117547123_117548666 926 G A 126.0 chr1 117548059 G 19 0 2 84 9 0 2 54 11 0 2 60 10 0 2 57 12 0 2 63 11 0 2 60 Y 64 0.049 0 +Contig7_chr1_125154638_125154844 190 G T 130.0 chr1 125154818 A 5 0 2 42 4 0 2 39 7 0 2 48 2 0 2 33 7 0 2 48 4 0 2 39 N 33 +99. 0 +Contig222_chr2_9817738_9818143 220 C T 888.0 chr2 9817960 C 17 0 2 78 12 0 2 63 20 0 2 87 8 0 2 51 11 0 2 60 12 0 2 63 Y 76 0.093 1 +Contig47_chr2_25470778_25471576 126 G A 888.0 chr2 25470896 G 12 0 2 63 14 0 2 69 14 0 2 69 10 0 2 57 18 0 2 81 13 0 2 66 N 11 0.289 1 +Contig10_chr2_40859744_40860534 637 G A 888.0 chr2 40860397 A 3 0 2 36 3 0 2 36 2 0 2 33 7 0 2 48 6 0 2 45 8 0 2 51 Y 42 1.435 0 +Contig52_chr2_41421981_41422725 604 C A 888.0 chr2 41422583 A 17 0 2 78 18 0 2 81 14 0 2 69 17 0 2 78 12 0 2 63 14 0 2 69 Y 44 0.882 0 +Contig94_chr2_43869105_43870358 220 G A 888.0 chr2 43869333 G 12 0 2 63 18 0 2 81 11 0 2 60 15 0 2 72 12 0 2 63 13 0 2 66 Y 1 0.156 0 +Contig34_chr2_48444129_48444939 695 C T 134.0 chr2 48444828 C 14 0 2 69 8 0 2 51 16 0 2 75 17 0 2 78 9 0 2 54 15 0 2 72 Y 161 0.375 0 +Contig6_chr2_56859179_56859956 671 T C 999.9 chr2 56859851 T 15 0 2 72 18 0 2 81 20 0 2 90 19 0 2 84 19 0 2 84 24 0 2 99 N 28 5.308 1 +Contig115_chr2_61631913_61632510 310 G T 999.3 chr2 61632216 G 7 0 2 48 9 0 2 54 7 0 2 48 11 0 2 60 10 0 2 57 10 0 2 57 N 13 0.184 0 +Contig31_chr2_67331584_67331785 39 C T 999.0 chr2 67331623 C 11 0 2 60 10 0 2 57 7 0 2 48 9 0 2 54 2 0 2 33 4 0 2 39 N 110 0.647 1 +Contig92_chr2_75906683_75907774 773 T C 85.4 chr2 75907438 C 12 0 2 63 12 0 2 63 17 0 2 78 8 0 2 51 8 0 2 51 13 0 2 66 Y 93 0.166 0 +Contig163_chr2_76402959_76404830 221 C T 127.0 chr2 76403181 C 4 0 2 42 10 0 2 57 9 0 2 54 11 0 2 60 7 0 2 48 9 0 2 54 Y 54 0.178 1 +Contig59_chr2_85243022_85243758 506 G A 96.3 chr2 85243509 T 9 0 2 54 11 0 2 60 12 0 2 63 14 0 2 69 10 0 2 57 7 0 2 48 Y 6 0.459 0 +Contig56_chr3_17326225_17327548 387 G C 91.2 chr3 17326591 G 14 0 2 69 13 0 2 66 15 0 2 72 15 0 2 72 13 0 2 66 12 0 2 63 Y 20 0.225 3 +Contig108_chr3_46210055_46210874 367 A G 21.0 chr3 46210423 A 19 0 2 84 10 0 2 57 16 0 2 75 14 0 2 69 20 0 2 87 11 0 2 60 N 236 0.028 1 +Contig16_chr3_47113407_47114449 322 G A 105.0 chr3 47113713 G 13 0 2 66 17 0 2 78 15 0 2 72 6 0 2 45 11 0 2 60 11 0 2 60 Y 114 0.132 5 +Contig3_chr3_47564810_47565251 262 T G 112.0 chr3 47565104 T 14 0 2 69 16 0 2 75 20 0 2 87 10 0 2 57 9 0 2 54 8 0 2 51 Y 24 0.073 1 +Contig35_chr3_49662401_49662929 270 A T 96.1 chr3 49662652 A 14 0 2 69 11 0 2 60 23 0 2 96 13 0 2 66 12 0 2 63 11 0 2 60 Y 36 3.583 2 +Contig97_chr3_49820354_49821631 1069 G A 44.1 chr3 49821402 G 9 0 2 54 9 0 2 54 6 0 2 45 10 0 2 57 5 0 2 42 8 0 2 51 N 6 0.201 2 +Contig25_chr3_53260697_53262560 402 G A 211.0 chr3 53261095 G 17 0 2 78 14 0 2 69 15 0 2 75 12 0 2 63 14 0 2 69 12 0 2 63 Y 116 1.033 0 +Contig11_chr3_53992739_53995954 2392 G A 82.4 chr3 53995143 A 12 0 2 66 11 0 2 60 14 0 2 69 6 0 2 45 11 0 2 60 17 0 2 78 Y 358 0.321 1 +Contig236_chr3_72676275_72676473 128 G A 278.0 chr3 72676410 G 12 0 2 63 11 0 2 60 13 0 2 66 10 0 2 57 11 0 2 60 8 0 2 51 N 36 0.496 1 +Contig48_chr3_74792236_74792388 63 T C 111.0 chr3 74792289 - 17 0 2 78 9 0 2 54 9 0 2 54 5 0 2 42 11 0 2 60 9 0 2 54 N -1 3.528 0 +Contig65_chr3_80727952_80728283 39 T C 71.2 chr3 80727990 T 7 0 2 48 3 0 2 36 8 0 2 51 6 0 2 45 8 0 2 51 11 0 2 60 N 22 7.078 0 +Contig53_chr3_86407941_86409349 1406 G A 86.9 chr3 86409317 A 5 0 2 42 5 0 2 42 4 0 2 39 10 0 2 57 8 0 2 51 12 0 2 63 N 14 3.285 1 +Contig13_chr3_92409738_92412300 718 A G 23.3 chr3 92410450 A 12 0 2 63 16 0 2 75 18 0 2 81 13 0 2 66 22 0 2 93 7 0 2 48 Y 23 0.224 2 +Contig134_chr4_12145648_12148225 1326 C T 164.0 chr4 12146961 C 9 0 2 54 8 0 2 51 7 0 2 48 3 0 2 36 5 0 2 42 5 0 2 42 Y 4 0.080 1 +Contig88_chr4_15557471_15557833 268 A G 145.0 chr4 15557737 A 6 0 2 45 6 0 2 45 11 0 2 60 9 0 2 54 5 0 2 42 6 0 2 45 Y 46 4.138 0 +Contig53_chr4_18823968_18824478 149 A G 91.3 chr4 18824115 A 18 0 2 81 15 0 2 72 21 0 2 90 13 0 2 66 9 0 2 54 12 0 2 63 N 51 0.251 0 +Contig19_chr4_26233601_26233991 146 G C 51.6 chr4 26233744 G 10 0 2 57 8 0 2 51 9 0 2 54 5 0 2 42 9 0 2 54 4 0 2 39 N 41 0.163 3 +Contig78_chr4_28579975_28580134 30 T G 19.6 chr4 28579994 - 4 0 2 39 3 0 2 36 5 0 2 42 4 0 2 39 2 0 2 33 2 0 2 33 N 33 0.499 0 +Contig16_chr4_30177226_30179725 621 C T 88.4 chr4 30177859 C 20 0 2 87 13 0 2 66 13 0 2 66 11 0 2 60 8 0 2 51 8 0 2 51 Y 45 0.797 1 +Contig30_chr4_46196500_46197672 1045 A C 33.4 chr4 46197522 C 16 0 2 75 9 0 2 54 4 0 2 39 7 0 2 48 14 0 2 69 6 0 2 45 Y 43 0.306 0 +Contig2_chr4_47039007_47039323 158 G C 35.1 chr4 47039160 - 8 0 2 51 9 0 2 54 13 0 2 66 8 0 2 51 10 0 2 60 9 0 2 54 N 0 0.131 0 +Contig17_chr4_61310346_61311158 267 C T 49.9 chr4 61310604 T 10 0 2 57 7 0 2 48 9 0 2 54 10 0 2 57 14 0 2 69 7 0 2 48 Y 219 0.098 0 +Contig26_chr4_64190783_64191295 64 A G 162.0 chr4 64190843 A 10 0 2 57 6 0 2 45 20 0 2 87 12 0 2 63 17 0 2 78 7 0 2 48 Y 306 7.428 0 +Contig11_chr4_65500960_65501654 634 T C 107.0 chr4 65501585 T 13 0 2 66 14 0 2 69 13 0 2 66 13 0 2 66 6 0 2 45 18 0 2 81 Y 10 6.849 0 +Contig38_chr4_67768488_67768982 113 A G 102.0 chr4 67768598 A 9 0 2 54 8 0 2 51 9 0 2 54 11 0 2 60 10 0 2 57 7 0 2 48 Y 188 3.175 0 +Contig30_chr4_70978564_70979580 596 A G 164.0 chr4 70979151 A 15 0 2 72 12 0 2 63 20 0 2 87 14 0 2 69 15 0 2 72 15 0 2 72 Y 111 2.458 2 +Contig72_chr4_74225793_74226492 674 A G 110.0 chr4 74226472 A 5 0 2 42 3 0 2 36 2 0 2 33 3 0 2 36 7 0 2 48 4 0 2 39 Y 115 +99. 1 +Contig32_chr4_75618955_75620254 301 T C 333.0 chr4 75619257 C 10 0 2 57 8 0 2 51 12 0 2 63 20 0 2 87 12 0 2 63 14 0 2 69 Y 34 0.163 2 +Contig31_chr5_4734956_4736547 1166 C T 133.0 chr5 4736132 C 14 0 2 69 8 0 2 51 17 0 2 78 4 0 2 39 9 0 2 54 12 0 2 63 Y 1 0.021 0 +Contig30_chr5_15698241_15699076 396 G T 76.6 chr5 15698633 T 8 0 2 51 9 0 2 54 10 0 2 57 7 0 2 48 11 0 2 60 8 0 2 54 Y 65 0.009 0 +Contig36_chr5_17709244_17710004 373 T C 281.0 chr5 17709624 T 6 0 2 45 9 0 2 54 7 0 2 48 4 0 2 39 10 0 2 57 4 0 2 39 Y 16 0.131 0 +Contig13_chr5_21881138_21881562 227 A G 251.0 chr5 21881356 A 11 0 2 60 20 0 2 87 22 0 2 93 10 0 2 57 10 0 2 57 21 0 2 90 Y 182 2.013 0 +Contig5_chr5_23188121_23190168 1841 C T 141.0 chr5 23189975 C 20 0 2 87 19 0 2 84 22 0 2 93 16 0 2 75 18 0 2 81 14 0 2 69 N 45 0.355 0 +Contig6_chr5_26899813_26900498 97 A C 88.6 chr5 26899910 A 15 0 2 72 14 0 2 69 27 0 2 108 15 0 2 72 13 0 2 69 12 0 2 63 Y 92 7.370 3 +Contig314_chr5_34019166_34019319 72 C A 20.1 chr5 -1 N 6 0 2 45 9 0 2 54 4 0 2 39 4 0 2 39 9 0 2 54 5 0 2 42 N -1 +99. 4 +Contig147_chr5_38980258_38980559 221 C T 40.8 chr5 38980477 C 15 0 2 72 15 0 2 72 19 0 2 84 10 0 2 57 12 0 2 63 20 0 2 87 Y 11 4.576 0 +Contig115_chr5_48119079_48120169 151 C T 78.3 chr5 48119234 C 17 0 2 78 10 0 2 57 14 0 2 69 16 0 2 75 8 0 2 51 12 0 2 63 Y 205 0.320 0 +Contig45_chr5_50892738_50892968 169 C A 25.8 chr5 50892911 C 10 0 2 57 7 0 2 48 10 0 2 60 6 0 2 45 6 0 2 45 13 0 2 66 N 244 0.497 1 +Contig40_chr5_51484164_51484696 14 A G 53.3 chr5 51484180 A 6 0 2 45 4 0 2 39 4 0 2 39 3 0 2 36 0 0 2 13 3 0 2 36 N 63 +99. 1 +Contig40_chr5_51664286_51667573 861 C T 148.0 chr5 51665149 C 20 0 2 87 21 0 2 90 20 0 2 87 11 0 2 60 16 0 2 75 15 0 2 72 Y 207 0.080 1 +Contig15_chr5_51889708_51891244 882 A G 149.0 chr5 51890581 G 13 0 2 66 18 0 2 81 17 0 2 78 22 0 2 93 15 0 2 72 22 0 2 93 Y 7 0.025 1 +Contig143_chr5_57231364_57232010 294 T C 78.5 chr5 57231644 T 3 0 2 36 5 0 2 42 4 0 2 39 2 0 2 33 10 0 2 57 6 0 2 45 Y 73 0.337 2 +Contig13_chr5_57609985_57610584 496 C T 50.5 chr5 57610476 C 17 0 2 78 9 0 2 54 6 0 2 45 8 0 2 51 10 0 2 57 12 0 2 63 N 77 2.022 1 +Contig230_chr5_58486998_58487280 227 T C 192.0 chr5 58487232 T 3 0 2 36 4 0 2 39 9 0 2 54 6 0 2 45 4 0 2 39 7 0 2 48 N 24 0.100 2 +Contig32_chr5_70852360_70853289 282 G A 114.0 chr5 70852623 G 16 0 2 75 11 0 2 60 13 0 2 66 12 0 2 63 13 0 2 66 7 0 2 48 Y 33 0.276 0 +Contig100_chr5_71189678_71190590 813 C T 30.8 chr5 71190523 C 11 0 2 60 11 0 2 60 9 0 2 54 10 0 2 57 6 0 2 45 13 0 2 66 Y 8 0.362 1 +Contig45_chr5_76133561_76134403 388 A G 103.0 chr5 76133941 G 3 0 2 36 8 0 2 51 8 0 2 51 5 0 2 42 6 0 2 45 7 0 2 48 Y 57 0.038 0 +Contig61_chr5_90202541_90204393 909 C T 101.0 chr5 90203461 T 7 0 2 48 5 0 2 42 14 0 2 69 3 0 2 36 5 0 2 42 8 0 2 51 Y 64 1.448 0 +Contig111_chr6_5821219_5822519 1060 A G 68.1 chr6 5822321 T 7 0 2 48 6 0 2 45 11 0 2 60 9 0 2 54 3 0 2 36 12 0 2 63 Y 7 0.231 1 +Contig220_chr6_10671338_10672441 999 T C 36.3 chr6 10672322 T 5 0 2 42 11 0 2 60 11 0 2 60 8 0 2 51 5 0 2 42 9 0 2 54 Y 1 1.667 0 +Contig226_chr6_17361986_17362884 418 G C 251.0 chr6 17362406 G 6 0 2 45 8 0 2 51 7 0 2 48 9 0 2 54 7 0 2 48 7 0 2 48 Y 7 0.147 0 +Contig51_chr6_20231207_20231785 161 A G 70.5 chr6 20231375 G 13 0 2 66 5 0 2 42 8 0 2 51 2 0 2 36 5 0 2 42 5 0 2 42 Y 153 1.754 0 +Contig102_chr6_30271329_30271577 39 T G 139.0 chr6 30271371 G 3 0 2 36 4 0 2 39 6 0 2 45 1 0 2 30 4 0 2 39 4 0 2 39 N 15 1.159 0 +Contig217_chr6_31393824_31394218 97 G A 115.0 chr6 31393921 G 9 0 2 54 19 0 2 84 15 0 2 72 12 0 2 63 7 0 2 48 10 0 2 57 N 45 0.477 0 +Contig186_chr6_31928098_31928245 73 G A 117.0 chr6 -1 N 5 0 2 42 8 0 2 51 2 0 2 33 4 0 2 39 1 0 2 30 5 0 2 42 N -1 0.276 1 +Contig52_chr6_33188498_33188724 123 G A 59.0 chr6 -1 N 5 0 2 42 13 0 2 66 8 0 2 51 4 0 2 39 9 0 2 54 9 0 2 54 N -1 0.880 1 +Contig102_chr6_38743009_38743435 290 A G 178.0 chr6 38743311 A 11 0 2 60 13 0 2 66 9 0 2 54 11 0 2 60 12 0 2 63 13 0 2 66 Y 34 0.148 4 +Contig81_chr6_49018353_49019532 179 C A 72.5 chr6 49018530 A 15 0 2 72 13 0 2 66 19 0 2 72 8 0 2 51 12 0 2 63 16 0 2 75 Y 15 0.145 1 +Contig112_chr6_51024554_51024851 100 A G 121.0 chr6 51024654 A 10 0 2 57 12 0 2 63 9 0 2 54 13 0 2 66 14 0 2 69 17 0 2 78 N 75 4.287 0 +Contig40_chr6_51412751_51413807 227 T C 94.5 chr6 51412975 C 5 0 2 42 8 0 2 51 7 0 2 48 9 0 2 54 11 0 2 60 10 0 2 57 Y 4 5.661 0 +Contig47_chr6_69073222_69074767 1315 T C 212.0 chr6 69074558 T 20 0 2 87 17 0 2 78 18 0 2 81 12 0 2 63 17 0 2 78 7 0 2 48 Y 9 0.652 0 +Contig30_chr6_74848932_74849059 57 C G 46.3 chr6 74848993 C 7 0 2 48 7 0 2 33 6 0 2 45 7 0 2 48 5 0 2 42 6 0 2 45 N -1 +99. 1 +Contig84_chr7_6648683_6650255 1297 G A 110.0 chr7 6649988 G 18 0 2 81 9 0 2 54 22 0 2 77 16 0 2 75 20 0 2 87 6 0 2 45 Y 83 0.166 0 +Contig239_chr7_13007379_13007700 275 A G 39.8 chr7 13007642 A 8 0 2 51 5 0 2 42 8 0 2 51 3 0 2 36 3 0 2 36 5 0 2 42 N 46 1.511 3 +Contig119_chr7_18310707_18310948 23 A T 133.0 chr7 18310729 A 6 0 2 45 5 0 2 42 10 0 2 57 5 0 2 42 2 0 2 33 2 0 2 33 N 4553 +99. 0 +Contig93_chr7_18513377_18513741 173 T C 130.0 chr7 18513533 C 15 0 2 72 11 0 2 60 18 0 2 81 6 0 2 45 10 0 2 57 14 0 2 69 Y 115 0.174 0 +Contig133_chr7_19603333_19603776 414 C G 31.9 chr7 19603734 G 10 0 2 57 4 0 2 39 4 0 2 39 5 0 2 42 9 0 2 54 9 0 2 54 N 78 +99. 5 +Contig132_chr7_20426224_20428145 1815 A G 28.3 chr7 20428041 A 11 1 2 43 12 0 2 63 19 0 2 84 23 0 2 96 14 0 2 69 10 0 2 57 N 11 0.264 0 +Contig206_chr7_26281823_26282074 103 C A 101.0 chr7 26281925 T 11 0 2 60 16 0 2 61 19 0 2 84 6 0 2 45 19 0 2 84 16 0 2 75 N -1 0.947 1 +Contig55_chr7_53147505_53148974 894 A G 68.4 chr7 53148397 G 22 0 2 93 13 0 2 66 16 0 2 75 8 0 2 51 16 0 2 75 11 0 2 60 Y 19 0.060 0 +Contig4_chr7_53685534_53688206 1709 C G 76.2 chr7 53687225 C 18 0 2 81 17 0 2 78 18 0 2 81 15 0 2 72 14 0 2 69 14 0 2 69 Y 32 0.659 1 +Contig61_chr7_55832923_55834065 506 T C 185.0 chr7 55833450 C 9 0 2 54 10 0 2 57 22 0 2 93 12 0 2 63 12 0 2 63 7 0 2 48 Y 1 0.019 0 +Contig91_chr8_12804505_12805470 409 C A 111.0 chr8 12804906 C 8 0 2 51 10 0 2 57 15 0 2 72 12 0 2 63 14 0 2 69 15 0 2 72 N 145 0.175 0 +Contig8_chr8_27811135_27812620 333 C T 37.9 chr8 27811458 C 4 0 2 39 11 0 2 60 18 0 2 81 5 0 2 42 6 0 2 45 5 0 2 42 Y 1 0.272 0 +Contig66_chr8_28273102_28273660 175 G C 81.6 chr8 28273263 T 9 0 2 54 17 0 2 78 19 0 2 84 8 0 2 51 16 0 2 75 19 0 2 84 Y 3 2.735 0 +Contig84_chr8_31375511_31376456 443 T C 125.0 chr8 31375954 T 10 0 2 57 15 0 2 72 27 0 2 108 18 0 2 81 16 0 2 75 9 0 2 54 Y 2 0.650 0 +Contig18_chr8_32575859_32577431 264 T C 151.0 chr8 32576124 T 20 0 2 87 14 0 2 69 17 0 2 78 14 0 2 69 13 0 2 66 14 0 2 69 Y 17 0.915 1 +Contig54_chr8_40913908_40916451 1275 G A 175.0 chr8 40915190 G 10 0 2 57 8 0 2 51 11 0 2 60 7 0 2 48 8 0 2 51 9 0 2 54 Y 21 0.056 3 +Contig93_chr8_44658786_44659075 180 T G 55.3 chr8 44658964 T 4 0 2 39 3 0 2 36 6 0 2 45 5 0 2 45 5 0 2 42 4 0 2 39 N 14 0.188 0 +Contig66_chr8_58562376_58563446 345 C G 5.74 chr8 58562721 C 14 0 2 69 12 0 2 63 9 0 2 57 10 0 2 57 9 0 2 54 10 0 2 57 Y 6 0.685 0 +Contig44_chr8_71186368_71188207 1455 G T 147.0 chr8 71187818 G 4 10 1 74 3 0 2 36 20 0 2 87 12 0 2 63 8 0 2 51 10 0 2 57 Y 88 0.036 0 +Contig73_chr9_29451535_29452248 616 A G 24.7 chr9 29452127 G 4 0 2 39 7 0 2 48 1 0 2 30 4 0 2 39 7 0 2 48 6 0 2 45 N 49 0.448 4 +Contig96_chr9_39008495_39009278 215 A C 98.7 chr9 39008708 C 7 0 2 48 13 0 2 66 28 0 2 111 16 0 2 75 17 0 2 78 17 0 2 78 Y 8 0.427 1 +Contig69_chr10_40547265_40548153 371 G A 58.1 chr10 40547649 A 9 0 2 54 8 0 2 51 8 0 2 51 9 0 2 54 4 0 2 39 5 0 2 42 Y 20 0.138 4 +Contig63_chr10_42716594_42719945 1018 A G 88.7 chr10 42717616 G 13 0 2 66 14 0 2 69 13 0 2 66 12 0 2 63 18 0 2 81 5 0 2 42 Y 25 1.740 0 +Contig22_chr10_43255307_43255570 81 C A 37.2 chr10 43255383 C 15 0 2 72 18 0 2 81 22 0 2 93 16 0 2 75 11 0 2 60 12 0 2 63 N 62 0.450 0 +Contig9_chr10_51475063_51476054 770 C T 57.3 chr10 51475839 C 6 0 2 45 16 0 2 75 16 0 2 75 13 0 2 66 9 0 2 54 9 2 2 21 N 80 0.394 0 +Contig42_chr10_53816543_53818392 1642 G A 27.5 chr10 53818172 A 7 0 2 48 13 0 2 66 17 0 2 78 14 0 2 69 19 0 2 84 16 0 2 75 N 1 0.433 0 +Contig36_chr10_53992615_53993741 229 G C 86.2 chr10 53992846 G 17 0 2 78 14 0 2 69 13 0 2 66 15 0 2 72 12 0 2 63 15 0 2 72 N 23 1.912 0 +Contig20_chr10_58141129_58141750 575 C T 46.1 chr10 58141701 C 7 0 2 48 8 0 2 51 9 0 2 54 3 0 2 36 4 0 2 39 9 0 2 54 N 1 4.264 0 +Contig26_chr10_59510973_59511899 146 C A 29.0 chr10 59511126 C 8 0 2 51 13 0 2 66 18 0 2 81 13 0 2 66 10 0 2 57 7 0 2 48 Y 208 1.077 0 +Contig72_chr11_7142765_7143772 146 G A 152.0 chr11 7142911 A 8 0 2 51 8 0 2 51 24 0 2 99 10 0 2 57 17 0 2 78 11 0 2 60 Y 90 1.137 0 +Contig9_chr11_9904571_9905983 1284 C T 151.0 chr11 9905857 C 16 0 2 75 19 0 2 84 17 0 2 78 16 0 2 75 12 0 2 63 13 1 2 44 Y 11 0.422 1 +Contig7_chr11_40017076_40017630 352 C T 46.3 chr11 40017422 C 7 0 2 48 9 0 2 54 6 0 2 45 8 0 2 51 16 0 2 75 9 0 2 54 Y 44 0.336 0 +Contig108_chr11_42953408_42955156 367 A G 89.4 chr11 42953779 A 17 0 2 78 11 0 2 60 14 0 2 69 20 0 2 87 14 0 2 69 17 0 2 78 Y 118 0.784 1 +Contig16_chr11_53408448_53408790 187 A G 153.0 chr11 53408638 A 7 0 2 48 9 0 2 54 18 0 2 81 10 0 2 57 11 0 2 60 12 0 2 63 Y 116 1.367 0 +Contig21_chr12_18403415_18404381 586 G T 34.5 chr12 18403983 - 13 0 2 66 16 0 2 75 25 0 2 102 12 0 2 63 12 0 2 63 14 0 2 69 Y 12 0.068 0 +Contig33_chr12_19804073_19804529 178 T C 69.4 chr12 19804261 T 13 0 2 66 13 0 2 66 22 0 2 93 11 0 2 60 12 0 2 63 18 0 2 81 Y 11 1.571 0 +Contig41_chr12_25565452_25566993 475 G T 6.29 chr12 25565926 G 15 0 2 72 14 0 2 69 10 0 2 57 15 0 2 72 18 0 2 81 19 0 2 84 N 10 2.231 1 +Contig9_chr12_27204351_27204696 239 A G 145.0 chr12 27204587 A 7 0 2 48 8 0 2 51 12 0 2 63 8 0 2 51 11 0 2 60 11 0 2 60 Y 14 0.046 0 +Contig45_chr12_30548282_30550498 448 C T 124.0 chr12 30548703 - 9 0 2 54 11 0 2 60 22 0 2 93 19 0 2 84 12 0 2 63 12 0 2 63 Y 66 0.305 0 +Contig46_chr12_35571846_35572563 58 G C 83.2 chr12 35571906 G 4 0 2 39 10 0 2 57 11 0 2 60 6 0 2 45 10 0 2 57 6 0 2 45 Y 55 +99. 1 +Contig28_chr12_42075871_42076044 136 G A 134.0 chr12 42076006 A 6 0 2 45 5 0 2 42 7 0 2 48 7 0 2 48 2 0 2 33 4 0 2 39 N 3 9.479 0 +Contig16_chr12_42386141_42387454 194 A G 161.0 chr12 42386323 A 11 0 2 60 8 0 2 54 23 0 2 96 17 0 2 78 6 0 2 45 13 0 2 66 Y 7 0.927 1 +Contig42_chr12_44424628_44425829 255 A G 84.4 chr12 44424879 A 12 0 2 63 19 0 2 84 23 0 2 96 15 0 2 72 18 0 2 81 14 0 2 69 Y 18 1.190 2 +Contig10_chr12_44447953_44449698 63 C T 105.0 chr12 44448020 C 11 0 2 60 9 0 2 54 12 0 2 63 10 0 2 57 15 0 2 72 8 0 2 51 Y 31 11.791 0 +Contig5_chr12_53880670_53882675 1221 A C 99.4 chr12 53881888 A 16 0 2 75 18 0 2 81 23 0 2 96 10 0 2 57 15 0 2 72 17 0 2 78 Y 31 0.061 0 +Contig86_chr12_56715356_56716464 818 T C 166.0 chr12 56716164 T 20 0 2 87 16 0 2 75 16 0 2 75 14 0 2 69 13 0 2 66 7 0 2 48 Y 22 1.092 0 +Contig3_chr12_65021967_65024097 238 T G 92.6 chr12 65022205 T 17 0 2 78 14 0 2 69 16 0 2 75 9 0 2 54 13 0 2 66 15 0 2 72 Y 258 0.117 0 +Contig43_chr12_66499742_66500010 121 G T 41.5 chr12 66499866 G 12 0 2 63 4 0 2 39 8 0 2 51 6 0 2 45 10 0 2 57 6 0 2 45 N 42 0.421 0 +Contig14_chr12_71364692_71365311 20 A C 103.0 chr12 71364712 A 7 0 2 48 3 0 2 36 5 0 2 42 1 0 2 30 2 0 2 33 3 0 2 36 Y 35 +99. 0 +Contig37_chr13_15910164_15910426 245 G A 32.9 chr13 -1 N 3 4 1 41 4 0 2 39 3 0 2 36 4 0 2 39 3 0 2 36 10 0 2 57 N -1 2.159 1 +Contig107_chr13_26045881_26046290 341 C G 81.4 chr13 26046230 C 16 0 2 75 20 0 2 90 14 0 2 69 15 0 2 72 9 0 2 54 9 0 2 54 Y 51 4.510 0 +Contig251_chr13_28498333_28501066 864 T G 296.0 chr13 28499180 T 3 0 2 36 5 0 2 42 4 0 2 39 2 0 2 33 5 0 2 42 6 0 2 45 Y 9 0.068 0 +Contig154_chr13_36777857_36778736 356 G A 95.5 chr13 36778225 A 6 0 2 45 11 0 2 60 11 0 2 60 9 0 2 54 13 0 2 66 8 0 2 51 Y 59 0.192 0 +Contig37_chr13_42529793_42530857 150 G T 192.0 chr13 42529926 G 18 0 2 81 14 0 2 69 16 0 2 75 14 0 2 69 8 0 2 51 11 0 2 60 N 22 0.795 5 +Contig47_chr13_47045833_47046626 257 A C 28.5 chr13 47046097 A 13 0 2 66 10 0 2 57 17 0 2 78 20 0 2 87 15 0 2 72 9 0 2 57 N 129 0.468 0 +Contig42_chr13_47730018_47730856 254 A G 75.1 chr13 47730294 A 13 0 2 66 6 0 2 45 12 0 2 63 9 0 2 54 16 0 2 75 11 0 2 63 Y 630 0.049 1 +Contig55_chr13_53467708_53468101 221 T G 132.0 chr13 53467925 T 25 0 2 102 12 0 2 63 26 0 2 105 7 0 2 48 16 0 2 75 16 0 2 75 N 20 5.717 1 +Contig49_chr13_55103679_55105532 503 G A 76.0 chr13 55104178 G 21 0 2 90 19 0 2 84 18 0 2 81 20 0 2 87 8 9 1 89 17 0 2 78 Y 20 0.259 1 +Contig66_chr13_66021813_66022244 319 C T 125.0 chr13 66022136 C 11 0 2 60 16 0 2 75 15 0 2 75 12 0 2 63 17 0 2 78 8 0 2 51 N 14 0.055 3 +Contig48_chr14_11839435_11843272 3014 A G 163.0 chr14 11842446 A 10 0 2 57 8 0 2 51 13 0 2 66 10 0 2 57 5 0 2 42 10 0 2 57 Y 31 0.908 0 +Contig9_chr14_23353717_23354432 80 G A 61.3 chr14 23353797 G 3 0 2 36 6 0 2 45 11 0 2 60 8 0 2 51 4 0 2 39 2 4 1 35 Y 11 0.444 0 +Contig14_chr14_24131180_24133488 1633 G A 131.0 chr14 24132818 G 21 0 2 90 16 0 2 75 12 0 2 63 10 0 2 57 11 0 2 60 20 0 2 87 Y 36 0.347 0 +Contig28_chr14_26905747_26909514 975 G C 3.13 chr14 26906723 G 16 0 2 75 10 0 2 57 12 0 2 63 15 0 2 72 10 0 2 57 7 0 2 48 N 287 0.117 2 +Contig14_chr14_29616948_29618316 109 G A 80.3 chr14 29617053 - 17 0 2 78 16 0 2 75 16 0 2 75 10 0 2 57 17 0 2 78 19 0 2 84 Y 32 1.051 0 +Contig76_chr14_30028102_30029179 1046 C T 38.5 chr14 30029169 T 3 0 2 36 6 0 2 45 9 0 2 54 7 0 2 48 9 0 2 54 8 0 2 51 Y 96 +99. 0 +Contig115_chr14_31417207_31417574 259 A G 12.1 chr14 31417454 G 13 0 2 66 15 0 2 72 21 0 2 90 12 0 2 63 13 0 2 66 9 0 2 54 N 28 5.379 2 +Contig70_chr14_46653662_46653790 111 G A 46.7 chr14 46653768 G 7 0 2 48 5 0 2 42 11 0 2 60 11 0 2 60 8 0 2 51 10 0 2 57 N 21 +99. 2 +Contig43_chr14_49991855_49993511 918 A G 112.0 chr14 49992767 G 15 0 2 72 10 0 2 57 11 0 2 63 9 0 2 54 12 0 2 63 9 0 2 54 Y 6 0.314 1 +Contig64_chr14_56768376_56768902 473 C T 29.0 chr14 56768832 C 15 0 2 72 11 0 2 60 14 0 2 69 14 0 2 69 7 0 2 48 9 0 2 54 Y 91 8.281 0 +Contig60_chr15_18493036_18494316 150 G A 92.6 chr15 18493188 G 9 0 2 54 13 0 2 66 9 0 2 54 6 0 2 45 5 0 2 42 12 0 2 63 Y 45 0.125 0 +Contig59_chr15_22138344_22138535 120 G C 142.0 chr15 22138470 C 11 0 2 60 10 0 2 57 18 0 2 81 4 0 2 39 10 0 2 57 15 0 2 72 N 8 2.553 0 +Contig112_chr15_26772864_26773267 374 C T 21.6 chr15 26773244 C 4 0 2 39 4 0 2 39 5 0 2 42 2 0 2 33 4 0 2 39 3 0 2 36 N 18 +99. 0 +Contig24_chr15_26894765_26895003 155 G A 87.6 chr15 -1 N 6 0 2 45 5 0 2 42 7 0 2 48 4 0 2 39 4 0 2 39 2 0 2 33 N -1 0.178 0 +Contig2_chr15_33944796_33947182 1860 G A 99.5 chr15 33946654 G 10 0 2 57 11 0 2 60 16 0 2 75 14 0 2 69 14 0 2 69 16 0 2 75 Y 16 0.252 0 +Contig73_chr15_34690052_34691332 714 T C 130.0 chr15 34690769 T 7 0 2 48 7 0 2 48 17 0 2 78 9 0 2 54 9 0 2 54 4 0 2 39 Y 7 6.003 0 +Contig68_chr15_37747190_37747426 126 G A 130.0 chr15 37747331 G 14 0 2 69 14 0 2 69 11 0 2 63 19 0 2 84 13 0 2 66 21 0 2 90 N 229 0.255 0 +Contig104_chr15_45106954_45107158 70 A T 64.4 chr15 45107015 A 6 0 2 45 6 0 2 45 19 0 2 84 7 0 2 48 7 0 2 48 3 0 2 36 N 202 4.319 0 +Contig119_chr16_6160274_6160477 180 G A 54.8 chr16 6160457 G 7 0 2 48 6 0 2 45 12 0 2 63 3 0 2 36 11 0 2 60 10 0 2 57 N 42 +99. 0 +Contig126_chr16_10611887_10612152 150 G T 145.0 chr16 10612037 G 14 0 2 69 9 0 2 54 11 0 2 63 8 0 2 51 8 0 2 51 11 0 2 60 N 15 0.104 6 +Contig43_chr16_20200090_20200514 70 A G 58.6 chr16 20200154 A 11 0 2 60 15 0 2 72 15 0 2 72 6 0 2 45 9 0 2 54 12 0 2 63 Y 2 0.466 1 +Contig60_chr16_28079136_28080263 588 T G 157.0 chr16 28079739 T 22 0 2 93 20 0 2 87 22 0 2 93 17 0 2 78 12 0 2 63 10 0 2 57 Y 105 5.999 1 +Contig70_chr16_33758668_33759655 104 A T 58.1 chr16 33758772 A 6 0 2 45 7 0 2 48 17 0 2 78 14 0 2 69 8 0 2 51 10 0 2 57 N 54 0.162 0 +Contig66_chr16_37935682_37935831 116 T C 99.2 chr16 37935802 C 12 0 2 63 6 0 2 45 19 0 2 84 12 0 2 63 13 0 2 66 17 0 2 78 N 266 +99. 2 +Contig16_chr16_40451506_40451643 84 A G 59.8 chr16 40451592 A 7 0 2 48 5 0 2 42 7 0 2 48 13 0 2 66 14 0 2 69 19 0 2 84 N 45 5.061 0 +Contig31_chr17_12128267_12129637 205 G A 90.5 chr17 12128484 G 7 0 2 48 6 0 2 45 6 0 2 45 11 0 2 60 7 0 2 48 4 0 2 39 Y 10 0.246 0 +Contig1_chr17_12979232_12980380 808 G T 12.3 chr17 12980028 G 18 0 2 81 12 0 2 63 21 0 2 90 13 0 2 66 22 0 2 93 18 0 2 81 Y 9 0.336 1 +Contig42_chr17_23434859_23438330 2100 C T 39.5 chr17 23436985 T 4 0 2 39 7 0 2 48 7 0 2 48 3 0 2 36 6 0 2 45 2 0 2 33 Y 25 0.344 0 +Contig63_chr17_23796320_23796814 220 A G 54.0 chr17 23796536 G 6 0 2 45 4 0 2 39 5 0 2 42 6 0 2 45 4 0 2 39 6 0 2 45 Y 139 0.067 1 +Contig76_chr17_24107434_24107834 316 T C 141.0 chr17 24107726 T 19 0 2 84 15 0 2 72 20 0 2 87 16 0 2 75 11 0 2 60 18 0 2 81 Y 30 0.175 2 +Contig99_chr17_26021506_26022200 505 C T 88.8 chr17 26022017 T 15 0 2 72 13 0 2 66 19 0 2 84 9 0 2 54 10 0 2 57 11 0 2 60 Y 1 0.172 1 +Contig59_chr17_26790302_26795045 287 C T 45.1 chr17 26790582 C 8 0 2 51 6 0 2 45 13 0 2 66 6 0 2 45 15 0 2 72 12 0 2 63 Y 75 0.019 1 +Contig99_chr17_27018324_27019378 446 G A 31.1 chr17 27018776 G 14 0 2 69 12 0 2 63 14 0 2 69 10 0 2 57 9 0 2 54 11 0 2 60 Y 13 0.290 4 +Contig125_chr17_27739115_27739410 63 G A 107.0 chr17 27739177 G 8 0 2 51 11 0 2 60 16 0 2 75 8 0 2 51 4 0 2 39 15 0 2 72 N 100 0.819 0 +Contig115_chr17_37489899_37490101 159 G A 62.4 chr17 37490067 G 4 0 2 39 3 0 2 36 4 0 2 39 4 0 2 39 3 0 2 36 6 0 2 45 N 4 1.411 1 +Contig180_chr17_45154356_45154925 524 A G 146.0 chr17 45154886 G 7 0 2 48 9 0 2 54 7 0 2 48 9 0 2 54 4 0 2 39 8 0 2 51 Y 11 +99. 2 +Contig61_chr17_48221795_48223545 1404 T A 177.0 chr17 48223216 T 15 0 2 72 14 0 2 69 24 0 2 99 17 0 2 78 18 0 2 81 24 0 2 99 Y 161 0.633 2 +Contig27_chr17_61713766_61716585 1056 G C 40.0 chr17 61714821 G 4 0 2 39 8 0 2 51 10 0 2 57 6 0 2 45 6 0 2 45 3 0 2 36 N 6 2.200 4 +Contig229_chr18_3706523_3708577 1076 A G 83.9 chr18 3707630 A 11 0 2 60 13 0 2 66 26 0 2 105 11 0 2 60 15 0 2 72 17 0 2 78 Y 63 0.445 0 +Contig24_chr18_14049894_14050480 24 A G 123.0 chr18 14049918 A 5 0 2 42 5 0 2 42 4 0 2 39 6 0 2 45 7 0 2 48 5 0 2 42 Y 17 +99. 0 +Contig123_chr18_19916160_19916379 116 G A 79.2 chr18 19916272 A 14 0 2 69 12 0 2 63 14 0 2 69 6 0 2 45 11 0 2 60 10 0 2 57 N 26 0.172 0 +Contig82_chr18_27305489_27306229 566 C T 49.5 chr18 27306051 A 6 0 2 45 6 0 2 45 10 0 2 57 11 0 2 60 6 0 2 45 7 0 2 48 N 1 0.349 0 +Contig71_chr18_34324706_34326687 136 G A 151.0 chr18 34324841 G 9 0 2 54 9 0 2 54 17 0 2 78 8 0 2 51 11 0 2 60 10 0 2 57 Y 2 2.129 2 +Contig16_chr18_34672093_34673044 538 T C 58.2 chr18 34672635 T 8 0 2 51 15 0 2 72 16 0 2 75 15 0 2 72 9 0 2 57 18 0 2 81 Y 8 0.214 1 +Contig96_chr18_38492535_38493333 624 G A 119.0 chr18 38493162 T 17 0 2 78 12 0 2 63 13 0 2 66 16 0 2 75 8 0 2 51 15 0 2 72 Y 127 0.131 0 +Contig226_chr18_47753756_47754666 427 T C 21.1 chr18 47754215 T 10 0 2 57 4 0 2 39 8 0 2 51 5 0 2 42 6 0 2 45 7 0 2 48 Y 42 0.522 0 +Contig170_chr18_49411558_49412230 94 C A 74.3 chr18 49411655 C 14 0 2 69 10 0 2 57 9 0 2 54 10 0 2 57 3 0 2 36 3 0 2 36 N 9 1.457 0 +Contig192_chr18_49419342_49420737 1058 C T 42.8 chr18 49420381 A 3 0 2 36 4 0 2 39 5 0 2 42 8 0 2 51 3 0 2 36 3 0 2 36 Y 34 2.107 2 +Contig64_chr18_55979770_55980315 49 G A 89.1 chr18 55979824 G 3 0 2 36 9 0 2 54 7 0 2 51 4 0 2 39 3 0 2 36 3 0 2 36 Y -1 2.124 0 +Contig20_chr18_58130301_58130735 112 A G 74.4 chr18 58130413 A 12 0 2 66 11 0 2 60 11 0 2 60 12 0 2 63 6 0 2 45 6 0 2 45 Y 10 0.290 0 +Contig67_chr19_12398520_12399367 499 C T 161.0 chr19 12399017 C 10 0 2 57 11 0 2 60 20 0 2 87 14 0 2 69 24 0 2 99 8 0 2 51 Y 137 5.634 0 +Contig66_chr19_16285672_16287223 996 C T 190.0 chr19 16286674 C 9 0 2 57 14 0 2 69 16 0 2 78 17 0 2 78 8 0 2 51 22 0 2 93 Y 40 0.110 0 +Contig129_chr19_25541958_25542221 202 T C 68.1 chr19 25542154 C 11 0 2 60 19 0 2 84 10 0 2 60 17 0 2 78 9 0 2 54 12 0 2 63 N -1 2.551 1 +Contig29_chr19_37339947_37341911 1692 C T 211.0 chr19 37341631 C 15 0 2 72 20 0 2 87 11 0 2 60 15 0 2 72 3 0 2 36 12 0 2 63 Y 7 0.096 0 +Contig39_chr19_47709708_47711327 444 C T 36.8 chr19 47710148 T 10 0 2 57 4 0 2 39 8 0 2 51 9 0 2 54 6 0 2 45 6 0 2 45 Y 95 1.251 1 +Contig60_chr19_54013816_54014398 281 A G 138.0 chr19 54014103 C 6 0 2 45 15 0 2 72 7 0 2 48 10 0 2 57 15 0 2 72 10 0 2 57 Y 188 1.271 0 +Contig251_chr19_56559098_56559626 452 T C 3.36 chr19 56559549 T 12 0 2 63 13 0 2 66 21 0 2 90 15 0 2 72 14 0 2 69 11 0 2 60 N 1 0.117 0 +Contig50_chr20_12138509_12141975 3206 C A 248.0 chr20 12141763 C 8 0 2 51 15 0 2 72 14 0 2 69 6 0 2 45 10 0 2 57 7 0 2 48 Y 2 0.384 0 +Contig36_chr20_32631363_32632049 176 G A 24.1 chr20 32631526 G 7 0 2 48 14 0 2 69 19 0 2 84 14 0 2 69 15 0 2 72 16 0 2 75 N 50 1.150 0 +Contig32_chr20_36468058_36468869 66 C T 40.4 chr20 36468127 C 6 0 2 45 3 0 2 36 4 0 2 39 5 0 2 42 3 0 2 36 4 0 2 39 N 59 0.281 0 +Contig24_chr20_38203888_38204900 834 C T 132.0 chr20 38204731 C 9 0 2 54 17 0 2 78 20 0 2 87 8 0 2 51 11 0 2 60 17 0 2 78 Y 14 0.397 0 +Contig79_chr20_44263127_44264103 456 G T 31.5 chr20 44263573 G 22 0 2 93 16 0 2 75 15 0 2 72 19 0 2 84 13 0 2 66 26 0 2 105 Y 8 3.250 0 +Contig26_chr20_45878482_45878787 197 A G 160.0 chr20 45878672 A 17 0 2 78 15 0 2 72 11 0 2 63 17 0 2 78 12 0 2 63 10 0 2 57 N 14 0.535 0 +Contig119_chr20_46550670_46551383 609 G A 139.0 chr20 46551277 G 7 0 2 48 17 0 2 78 19 0 2 84 20 0 2 87 9 0 2 54 15 0 2 72 Y 7 0.488 1 +Contig50_chr21_4178523_4178687 121 G A 362.0 chr21 4178640 G 8 0 2 51 14 0 2 69 5 0 2 42 3 0 2 36 11 0 2 60 4 0 2 39 N 392 0.483 0 +Contig103_chr21_10177255_10177765 121 G A 125.0 chr21 10177367 G 12 0 2 63 10 0 2 57 10 0 2 57 17 0 2 78 14 0 2 69 7 0 2 51 Y 37 0.213 3 +Contig1_chr21_10805534_10806399 766 A G 146.0 chr21 10806301 G 10 0 2 57 6 0 2 45 9 0 2 54 6 0 2 45 7 0 2 48 5 0 2 42 Y 20 0.319 0 +Contig46_chr21_21029492_21030645 443 C T 5.37 chr21 21029910 C 15 0 2 72 11 0 2 60 16 0 2 75 15 0 2 72 13 0 2 66 6 0 2 45 Y 96 3.737 0 +Contig129_chr21_31045749_31046924 381 A G 129.0 chr21 31046141 A 19 0 2 84 8 0 2 51 23 0 2 96 12 0 2 63 15 0 2 72 18 0 2 81 Y 69 0.028 2 +Contig23_chr21_31651123_31651986 840 C T 71.3 chr21 31651957 T 6 0 2 45 9 0 2 54 8 0 2 51 10 0 2 57 4 0 2 39 7 0 2 48 Y 105 2.977 3 +Contig64_chr21_43341847_43342031 84 T C 114.0 chr21 43341926 T 11 0 2 60 9 0 2 54 10 0 2 57 6 0 2 45 6 0 2 45 7 0 2 48 N 10 3.954 2 +Contig60_chr21_43475347_43475824 175 C T 8.05 chr21 43475551 T 6 0 2 45 7 0 2 48 13 0 2 66 6 0 2 45 14 0 2 69 14 0 2 69 N 45 0.058 0 +Contig159_chr22_7896450_7896974 109 G C 151.0 chr22 7896570 G 16 0 2 75 5 7 1 62 14 0 2 69 16 0 2 75 13 0 2 66 13 0 2 66 Y 16 0.465 0 +Contig46_chr22_9416920_9417467 381 G A 145.0 chr22 9417259 G 10 0 2 57 9 0 2 54 10 0 2 57 6 0 2 45 13 0 2 66 7 0 2 48 Y 154 0.242 0 +Contig86_chr22_9440787_9441725 713 T G 119.0 chr22 9441488 G 6 0 2 45 12 0 2 63 10 0 2 57 11 0 2 60 13 0 2 66 16 0 2 75 Y 132 0.218 0 +Contig16_chr22_15636960_15637372 236 A C 9.79 chr22 15637192 T 4 0 2 39 5 0 2 42 12 0 2 63 7 0 2 48 6 0 2 45 11 0 2 60 Y 5 2.163 0 +Contig4_chr22_16114310_16114546 128 G C 101.0 chr22 16114432 G 10 0 2 57 13 0 2 66 20 0 2 87 20 0 2 87 16 0 2 75 9 0 2 54 N 19 0.526 0 +Contig23_chr22_34612023_34612568 167 C G 92.3 chr22 34612181 C 11 0 2 60 18 0 2 81 13 0 2 66 8 0 2 51 12 0 2 63 14 0 2 69 Y 7 0.409 0 +Contig4_chr22_38252245_38253712 799 A C 159.0 chr22 38253064 A 18 0 2 81 15 0 2 72 15 0 2 72 20 0 2 87 27 0 2 108 15 0 2 72 Y 90 4.330 0 +Contig122_chr22_48412466_48414788 1888 C T 125.0 chr22 48414355 T 16 0 2 75 15 0 2 72 16 0 2 75 14 0 2 72 12 0 2 63 7 0 2 48 N 42 0.122 0 +Contig77_chr22_49764414_49764875 353 C A 148.0 chr22 49764777 C 7 4 1 65 18 0 2 81 16 0 2 75 20 0 2 87 4 3 1 52 9 4 1 67 Y 12 0.941 0 +Contig26_chr22_57817664_57819633 1453 A G 150.0 chr22 57819121 G 9 0 2 54 9 0 2 54 13 0 2 66 15 0 2 72 11 0 2 60 14 0 2 69 N 15 0.471 1 +Contig348_chr22_62406104_62406495 189 C A 134.0 chr22 62406302 A 9 0 2 54 14 0 2 69 11 0 2 60 10 0 2 57 12 0 2 63 6 0 2 45 Y 5 0.912 0 +Contig133_chr23_3525134_3526502 1223 A G 201.0 chr23 3526387 A 11 0 2 60 13 0 2 66 23 0 2 96 21 0 2 90 13 0 2 66 10 0 2 57 Y 61 1.359 0 +Contig111_chr23_7058063_7058181 107 G A 108.0 chr23 7058162 A 8 0 2 51 8 0 2 51 7 0 2 48 2 0 2 33 5 0 2 42 6 0 2 45 N 3 +99. 0 +Contig79_chr23_7844129_7844837 110 C A 141.0 chr23 7844237 T 13 0 2 66 15 0 2 72 17 0 2 78 12 0 2 63 15 0 2 72 16 0 2 75 Y 40 0.339 0 +Contig38_chr23_9201002_9201725 597 C T 155.0 chr23 9201609 T 17 0 2 78 8 0 2 51 13 0 2 66 5 0 2 42 11 0 2 60 7 0 2 48 Y 167 0.633 1 +Contig33_chr23_20672540_20674320 347 T A 91.4 chr23 20672885 A 11 0 2 60 14 0 2 69 15 0 2 72 7 0 2 48 12 0 2 63 18 0 2 81 Y 31 0.452 1 +Contig35_chr23_28447813_28449115 70 T A 21.3 chr23 28447881 T 9 0 2 54 8 0 2 51 10 0 2 57 9 0 2 54 10 0 2 57 12 0 2 63 N 251 0.163 1 +Contig51_chr23_30590939_30591162 140 C T 142.0 chr23 30591080 C 14 0 2 69 4 0 2 39 10 0 2 57 12 0 2 63 14 0 2 69 4 0 2 39 N 13 1.658 0 +Contig57_chr23_32216351_32216721 179 T G 143.0 chr23 32216534 T 15 0 2 72 15 0 2 72 23 0 2 96 13 0 2 66 16 0 2 75 15 0 2 72 N 32 1.387 1 +Contig93_chr23_35744841_35745791 40 A T 30.4 chr23 35744880 T 6 0 2 45 7 0 2 48 7 0 2 48 2 0 2 33 5 0 2 42 5 0 2 42 Y 50 2.173 0 +Contig32_chr23_48285289_48286638 186 T C 176.0 chr23 48285470 T 18 0 2 81 12 0 2 63 16 0 2 75 13 0 2 66 9 0 2 54 9 0 2 54 Y 4 4.238 1 +Contig50_chr24_22515247_22516072 761 C T 243.0 chr24 22515981 T 11 0 2 60 10 0 2 57 8 0 2 51 9 0 2 54 18 0 2 81 8 0 2 51 Y 1 0.190 0 +Contig84_chr24_29196623_29199644 466 C T 126.0 chr24 29197091 T 7 0 2 48 11 0 2 60 8 0 2 51 7 0 2 48 11 0 2 60 15 0 2 72 Y 42 0.215 0 +Contig145_chr24_34778364_34778898 163 T C 372.0 chr24 34778541 C 10 0 2 57 8 0 2 51 12 0 2 63 12 0 2 63 6 1 2 31 7 0 2 48 Y 40 0.037 0 +Contig34_chr24_36147443_36150244 2679 C T 140.0 chr24 36150125 C 13 0 2 66 7 0 2 48 14 0 2 69 14 0 2 69 10 0 2 57 13 0 2 66 N 282 0.099 1 +Contig164_chr24_46598127_46599206 84 C T 105.0 chr24 46598214 C 13 0 2 66 12 0 2 63 15 0 2 72 15 0 2 72 11 0 2 60 8 0 2 51 Y 22 1.262 1 +Contig144_chr25_4011170_4013134 541 A G 160.0 chr25 4011690 A 12 0 2 63 17 0 2 78 13 0 2 66 13 0 2 66 13 0 2 66 13 0 2 66 Y 5 0.087 0 +Contig81_chr25_6103472_6104760 699 G A 378.0 chr25 6104190 A 14 0 2 69 16 0 2 75 13 0 2 66 11 0 2 60 11 0 2 60 12 0 2 63 Y 33 0.789 2 +Contig152_chr25_7486442_7487609 75 A G 11.6 chr25 7486515 A 17 0 2 78 13 0 2 66 8 0 2 51 16 0 2 75 8 0 2 51 6 0 2 45 N 2 0.158 0 +Contig24_chr25_7695778_7698612 2714 C T 130.0 chr25 7698446 C 16 0 2 75 13 0 2 66 22 0 2 93 17 0 2 78 10 0 2 57 17 0 2 78 Y 27 0.346 0 +Contig89_chr25_8635170_8636009 586 G C 209.0 chr25 8635744 G 13 0 2 66 13 0 2 66 21 0 2 93 14 0 2 69 15 0 2 72 15 0 2 72 Y 14 0.067 0 +Contig59_chr25_18196776_18197707 785 G A 112.0 chr25 18197551 G 8 10 1 42 27 0 2 108 21 0 2 90 18 0 2 81 10 0 2 57 14 0 2 69 N 36 3.625 0 +Contig103_chr25_38891221_38892140 407 G A 131.0 chr25 38891644 G 8 0 2 51 14 0 2 69 18 0 2 81 8 0 2 51 8 0 2 51 11 0 2 60 Y 149 0.167 4 +Contig84_chr25_42407960_42408708 55 C T 119.0 chr25 42408013 C 6 0 2 45 9 0 2 54 11 0 2 60 9 0 2 54 7 0 2 48 8 0 2 51 Y 11 0.121 0 +Contig73_chr25_43562500_43564110 955 T C 52.1 chr25 43563469 C 9 0 2 57 4 0 2 39 6 0 2 45 5 0 2 42 7 0 2 48 10 0 2 57 Y 4 1.406 0 +Contig37_chr25_51074433_51074885 170 A G 102.0 chr25 51074589 G 11 0 2 60 7 0 2 48 6 0 2 45 15 0 2 72 9 0 2 54 7 0 2 48 Y 68 0.207 1 +Contig204_chr26_4311195_4311778 170 C T 16.9 chr26 4311363 T 20 0 2 87 8 0 2 51 13 0 2 66 18 0 2 81 11 0 2 60 14 0 2 69 N 35 0.085 0 +Contig122_chr26_7622321_7623491 106 C G 139.0 chr26 7622423 C 3 0 2 36 9 0 2 54 10 0 2 57 12 0 2 63 9 0 2 54 5 0 2 42 N 19 0.458 0 +Contig11_chr26_11062142_11062902 707 C A 108.0 chr26 11062836 T 7 0 2 48 8 0 2 51 16 0 2 75 10 0 2 57 6 0 2 45 14 0 2 69 Y -1 4.709 0 +Contig133_chr26_17695661_17696368 39 T G 98.7 chr26 17695700 T 10 0 2 57 3 0 2 36 11 0 2 60 9 0 2 54 2 0 2 33 1 0 2 30 N 85 3.402 0 +Contig146_chr26_26622638_26623906 574 G A 186.0 chr26 26623219 A 11 0 2 60 12 0 2 63 9 0 2 54 11 0 2 60 9 0 2 54 12 0 2 63 Y 1 0.318 0 +Contig8_chr26_27834126_27834326 140 G A 41.7 chr26 27834268 G 13 0 2 66 7 0 2 48 13 0 2 66 11 0 2 60 12 0 2 63 6 0 2 45 N 29 0.142 1 +Contig78_chr26_31128839_31129005 123 T C 145.0 chr26 -1 N 11 0 2 60 3 0 2 36 7 0 2 48 8 0 2 51 10 0 2 46 7 0 2 48 N -1 1.230 1 +Contig28_chr26_32935355_32935833 289 T C 77.9 chr26 32935638 T 15 0 2 72 22 0 2 93 15 0 2 72 9 0 2 54 15 0 2 72 17 0 2 78 Y 10 2.258 1 +Contig135_chr27_6853874_6854079 158 C T 116.0 chr27 6854032 T 18 0 2 81 19 0 2 84 13 0 2 66 7 0 2 48 8 0 2 51 11 0 2 60 N 4 0.060 1 +Contig47_chr27_11777710_11777915 25 A G 67.3 chr27 11777731 A 3 0 2 36 5 0 2 42 6 0 2 45 10 0 2 57 9 0 2 54 6 0 2 45 N 97 +99. 0 +Contig23_chr27_14633002_14633153 23 G A 128.0 chr27 14633023 A 3 0 2 36 4 0 2 39 5 0 2 42 5 0 2 42 3 0 2 36 2 0 2 33 N 240 3.881 0 +Contig29_chr27_15428166_15429413 380 T C 140.0 chr27 15428539 T 15 0 2 72 15 0 2 72 17 0 2 78 15 0 2 72 15 0 2 72 15 0 2 72 Y 47 0.916 1 +Contig31_chr27_19519489_19520891 129 G T 14.9 chr27 19519624 T 12 0 2 63 19 0 2 84 20 0 2 87 16 0 2 75 10 0 2 57 11 0 2 60 Y 48 2.756 0 +Contig35_chr27_40596169_40596445 20 G C 133.0 chr27 40596189 G 8 0 2 51 3 0 2 36 4 0 2 39 2 0 2 33 4 0 2 39 4 0 2 39 Y 4 +99. 1 +Contig85_chr27_45471750_45472022 211 G A 53.1 chr27 45471964 G 18 0 2 81 10 0 2 57 15 0 2 72 0 13 0 36 16 0 2 75 14 0 2 69 N 75 2.502 1 +Contig131_chr28_6481806_6483783 138 C T 36.2 chr28 6481953 C 12 0 2 63 12 0 2 63 20 0 2 87 11 0 2 60 10 0 2 57 12 0 2 63 Y 10 0.387 0 +Contig141_chr28_10027332_10028242 780 T G 74.8 chr28 10028095 T 10 0 2 57 11 0 2 60 14 0 2 69 10 0 2 57 7 0 2 48 9 0 2 54 Y 19 3.348 0 +Contig144_chr28_15468203_15470548 743 G A 20.0 chr28 15468942 G 13 0 2 66 12 0 2 63 10 0 2 57 11 0 2 60 16 0 2 75 7 0 2 48 N 14 0.053 0 +Contig47_chr28_21311718_21312366 541 G A 116.0 chr28 21312258 G 9 0 2 54 6 0 2 45 12 0 2 63 6 0 2 45 5 0 2 45 12 0 2 63 N 9 0.240 0 +Contig60_chr28_30197166_30197364 92 T C 164.0 chr28 30197258 T 10 0 2 57 13 0 2 66 15 0 2 72 16 0 2 75 12 0 2 63 11 0 2 60 N 369 1.139 0 +Contig29_chr29_4726399_4727143 559 A T 163.0 chr29 4726955 A 15 0 2 72 18 0 2 81 18 0 2 81 16 0 2 75 11 0 2 60 14 0 2 72 Y 161 3.114 0 +Contig48_chr29_13129286_13130137 232 A G 92.2 chr29 13129514 G 13 0 2 66 11 0 2 60 19 0 2 84 16 0 2 75 11 0 2 60 17 0 2 78 Y 337 2.581 1 +Contig33_chr29_17000374_17000921 71 C T 48.6 chr29 17000441 - 4 0 2 39 9 0 2 54 12 0 2 66 10 0 2 57 7 0 2 48 4 0 2 39 N 26 5.491 0 +Contig34_chr29_17581796_17584016 2105 C T 126.0 chr29 17583890 T 14 0 2 69 11 0 2 60 18 0 2 81 12 0 2 63 10 0 2 57 10 0 2 57 Y 22 2.208 0 +Contig19_chr29_20976080_20977761 1007 G A 115.0 chr29 20977076 G 19 0 2 84 22 0 2 93 22 0 2 93 22 0 2 93 11 0 2 60 13 0 2 66 Y 4 1.915 0 +Contig51_chr29_21149853_21150467 266 C T 146.0 chr29 21150118 C 12 0 2 63 12 0 2 63 23 0 2 96 14 0 2 69 13 0 2 66 10 0 2 57 Y 4 0.051 0 +Contig1_chr30_5992217_5993068 106 C T 129.0 chr30 5992319 C 10 0 2 57 11 0 2 60 7 0 2 48 11 0 2 60 10 0 2 57 12 0 2 63 Y 76 1.079 0 +Contig1_chr30_8232878_8233406 402 C T 127.0 chr30 8233264 C 8 0 2 51 19 0 2 84 16 0 2 75 18 0 2 81 10 0 2 57 14 0 2 69 Y 358 5.283 0 +Contig108_chr30_9436961_9437520 546 C T 39.8 chr30 9437502 C 7 0 2 48 5 0 2 42 2 0 2 33 7 0 2 48 5 0 2 42 7 0 2 48 Y 64 +99. 0 +Contig165_chr30_25804389_25804926 190 T C 126.0 chr30 25804592 C 3 0 2 36 8 0 2 51 7 0 2 48 10 0 2 57 7 0 2 48 4 0 2 39 Y 113 0.329 0 +Contig193_chr30_27495616_27496125 434 C A 234.0 chr30 27496024 C 13 0 2 66 16 0 2 75 25 0 2 102 16 0 2 75 13 0 2 66 14 0 2 69 Y 76 2.621 0 +Contig38_chr31_5164423_5166573 2074 C T 134.0 chr31 5166501 T 13 0 2 66 10 0 2 57 17 0 2 78 11 0 2 60 17 0 2 78 10 0 2 57 Y 58 +99. 0 +Contig6_chr31_9649308_9650149 431 G T 162.0 chr31 9649742 G 31 0 2 120 23 0 2 96 17 0 2 78 17 0 2 78 10 0 2 57 16 0 2 75 Y 98 2.200 0 +Contig7_chr31_12384974_12386400 305 C T 69.6 chr31 12385267 C 6 0 2 45 10 0 2 57 11 0 2 60 11 0 2 60 9 0 2 54 12 0 2 63 Y 44 1.165 0 +Contig90_chr31_17267583_17267778 81 C A 143.0 chr31 17267665 C 20 0 2 87 6 0 2 45 14 0 2 72 22 0 2 93 17 0 2 78 15 0 2 72 N 7 0.565 0 +Contig137_chr31_23357653_23358568 885 G A 119.0 chr31 23358545 G 5 0 2 42 3 0 2 36 3 0 2 36 2 0 2 33 3 0 2 36 4 0 2 39 Y 11 +99. 0 +Contig17_chr31_26433828_26434459 498 T C 9.79 chr31 26434322 T 18 0 2 81 10 0 2 57 15 0 2 72 13 0 2 66 16 0 2 75 15 0 2 72 Y 137 4.814 0 +Contig30_chr32_25902721_25905783 208 C G 162.0 chr32 25902927 G 11 0 2 60 13 0 2 66 11 0 2 60 12 0 2 63 7 0 2 48 11 0 2 60 Y 145 0.322 2 +Contig42_chr32_38900713_38901320 320 A G 134.0 chr32 38901021 T 12 0 2 63 10 0 2 57 9 11 1 104 5 0 2 42 19 0 2 84 7 6 1 56 Y 71 0.165 0 +Contig18_chr33_22207246_22209159 1363 G T 51.5 chr33 22208619 - 16 0 2 75 8 0 2 51 11 0 2 60 10 0 2 57 15 0 2 72 12 0 2 63 Y 59 2.560 0 +Contig104_chr33_22483642_22484187 424 C T 140.0 chr33 22484054 T 13 0 2 66 16 0 2 75 9 0 2 54 15 0 2 72 13 0 2 66 10 0 2 57 Y 36 0.404 0 +Contig170_chr33_26189421_26189940 292 T C 98.4 chr33 26189703 T 21 0 2 90 13 0 2 66 15 0 2 72 13 0 2 66 19 0 2 84 13 0 2 66 Y 23 0.307 0 +Contig41_chr34_16544482_16545449 46 T C 102.0 chr34 16544523 T 5 0 2 42 11 0 2 60 6 0 2 45 0 2 0 3 7 0 2 48 8 0 2 51 Y 215 1.156 0 +Contig8_chr34_18474513_18475673 1122 C A 129.0 chr34 18475628 A 8 0 2 51 15 0 2 72 13 0 2 66 17 0 2 78 13 0 2 66 6 0 2 45 Y 61 0.123 2 +Contig152_chr34_31794848_31795540 242 G A 93.2 chr34 31795093 G 11 0 2 60 24 0 2 99 17 0 2 78 15 0 2 72 18 0 2 81 17 0 2 78 Y 123 2.780 0 +Contig28_chr34_41708848_41712034 1381 A G 78.2 chr34 41710232 A 11 0 2 60 17 0 2 78 15 0 2 72 16 0 2 75 15 0 2 72 14 0 2 69 Y 236 0.234 0 +Contig85_chr34_42798284_42800584 1845 C T 171.0 chr34 42800126 T 5 0 2 42 7 0 2 48 6 0 2 45 7 0 2 48 6 0 2 45 2 0 2 33 Y 5 2.787 0 +Contig47_chr35_3666773_3667898 348 G T 124.0 chr35 3667121 G 9 0 2 54 20 0 2 87 18 0 2 81 15 0 2 72 12 0 2 63 14 0 2 69 Y 285 0.235 0 +Contig195_chr35_15722500_15722741 205 G A 4.08 chr35 15722718 G 3 0 2 36 5 0 2 42 1 0 2 30 6 0 2 45 1 0 2 30 1 0 2 30 N 43 +99. 0 +Contig101_chr35_19513178_19513697 62 C T 112.0 chr35 19513238 C 12 0 2 63 7 0 2 48 13 0 2 66 7 0 2 48 5 0 2 42 8 0 2 51 N 115 3.135 0 +Contig47_chr35_24382042_24382526 33 G A 87.0 chr35 24382076 G 5 0 2 42 4 0 2 39 6 0 2 45 7 0 2 48 4 0 2 39 2 0 2 33 Y 71 +99. 0 +Contig77_chr35_24796947_24797172 65 A G 52.1 chr35 24797009 A 7 0 2 48 5 0 2 42 8 0 2 51 6 0 2 45 12 0 2 63 10 0 2 57 N 11 1.401 3 +Contig74_chr35_25394343_25394813 303 A T 221.0 chr35 25394646 G 23 0 2 96 15 0 2 72 25 0 2 105 7 7 1 49 18 0 2 81 16 0 2 75 Y 58 4.298 0 +Contig5_chr36_4562983_4563634 343 C T 151.0 chr36 4563324 T 20 0 2 87 20 0 2 87 23 0 2 96 24 0 2 99 9 0 2 54 8 0 2 51 Y 40 1.169 0 +Contig75_chr36_7885319_7885588 53 G A 25.7 chr36 7885372 G 10 0 2 57 8 0 2 51 13 0 2 66 7 0 2 48 4 0 2 39 7 0 2 48 N 7 2.653 0 +Contig184_chr36_18956191_18958552 187 A G 11.5 chr36 18956371 G 10 0 2 57 11 0 2 60 21 0 2 90 14 0 2 69 7 0 2 48 4 0 2 39 N 278 1.434 2 +Contig12_chr36_21557176_21557828 513 T A 159.0 chr36 21557695 A 11 0 2 60 14 0 2 69 21 0 2 90 12 0 2 63 15 0 2 72 11 0 2 60 Y 55 0.222 0 +Contig2_chr36_22436067_22436794 653 C T 73.0 chr36 22436730 C 11 0 2 60 16 0 2 75 13 0 2 66 11 0 2 60 21 0 2 90 21 0 2 90 Y 9 0.534 0 +Contig133_chr36_32954045_32955409 136 A G 116.0 chr36 32954182 A 16 0 2 75 15 0 2 72 20 0 2 87 11 0 2 60 18 0 2 81 13 0 2 66 Y 74 3.772 1 +Contig53_chr37_6665763_6665919 116 C T 111.0 chr37 6665875 C 9 0 2 54 9 0 2 54 5 0 2 42 9 0 2 54 8 0 2 51 10 0 2 57 N 15 10.875 1 +Contig42_chr37_9589176_9591269 252 G A 25.1 chr37 9589430 G 10 0 2 40 13 0 2 66 18 0 2 81 21 0 2 90 9 0 2 54 17 0 2 78 N 67 1.170 2 +Contig2_chr37_17134963_17136513 1140 A C 158.0 chr37 17136092 A 14 0 2 69 24 0 2 99 17 0 2 78 16 0 2 75 15 0 2 75 13 0 2 66 Y 12 0.053 1 +Contig18_chr37_17147806_17149851 291 T G 112.0 chr37 17148084 T 4 6 1 45 16 0 2 75 17 0 2 78 14 0 2 69 22 0 2 93 13 0 2 66 Y 41 4.442 0 +Contig64_chr37_17606895_17607534 565 C T 30.2 chr37 17607439 A 9 0 2 54 16 0 2 75 20 0 2 87 14 0 2 69 16 0 2 75 10 0 2 57 N 20 1.622 0 +Contig126_chr37_21587881_21590621 373 G T 132.0 chr37 21588256 G 11 0 2 60 11 0 2 60 23 0 2 96 12 0 2 63 8 0 2 51 18 0 2 81 Y 12 0.549 0 +Contig2_chr37_31197993_31198256 182 C T 39.6 chr37 31198171 T 6 0 2 45 10 0 2 57 7 0 2 48 9 0 2 54 10 0 2 57 12 0 2 63 N 2 0.595 0 +Contig46_chr37_31852376_31853555 825 A G 111.0 chr37 31853191 G 19 0 2 84 14 0 2 69 15 0 2 72 7 0 2 48 8 0 2 51 16 0 2 75 Y 17 0.128 1 +Contig7_chr38_12217200_12218387 1163 A T 44.4 chr38 12218353 A 11 0 2 60 13 0 2 66 17 0 2 78 10 0 2 57 11 0 2 60 11 0 2 60 Y 67 +99. 0 +Contig15_chr38_12282020_12282253 150 C T 156.0 chr38 12282164 A 17 0 2 78 11 0 2 60 19 0 2 84 14 0 2 69 5 0 2 42 14 0 2 69 Y 26 2.952 1 +Contig6_chr38_16185744_16186110 325 A G 74.9 chr38 16186061 A 5 0 2 42 3 0 2 36 9 0 2 54 7 0 2 48 1 0 2 30 12 0 2 63 Y 40 +99. 0 +Contig265_chrX_2689247_2689484 114 C G 103.0 chrX 2689356 C 11 0 2 60 9 0 2 54 13 0 2 66 16 0 2 75 14 0 2 69 10 0 2 57 N 2 9.232 1 +Contig122_chrX_6026976_6027327 330 C T 79.4 chrX 6027303 C 3 0 2 36 3 0 2 36 3 0 2 36 4 0 2 39 3 0 2 36 6 0 2 45 Y 30 +99. 0 +Contig113_chrX_26287829_26288398 385 C T 59.6 chrX 26288213 C 9 0 2 54 9 0 2 54 17 0 2 78 11 0 2 60 3 8 1 44 4 0 2 39 N 13 0.077 0 +Contig237_chrX_31256648_31257654 165 T A 246.0 chrX 31256814 T 7 0 2 48 23 0 2 96 19 0 2 84 17 0 2 78 14 0 2 69 8 0 2 51 Y 37 1.481 0 +Contig90_chrX_57430715_57431566 548 C T 116.0 chrX 57431266 T 9 0 2 54 18 0 2 81 13 0 2 66 14 0 2 69 8 0 2 54 7 0 2 48 Y 261 0.154 1 +Contig133_chrX_84833782_84834125 182 G A 69.7 chrX 84833962 G 5 0 2 42 18 0 2 81 12 0 2 63 19 0 2 84 6 3 1 27 7 0 2 48 N 619 0.278 0 +Contig125_chrX_93319363_93320877 349 A C 145.0 chrX 93319721 A 4 0 2 39 6 0 2 45 11 0 2 60 10 0 2 57 13 0 2 66 6 0 2 45 Y 59 1.686 0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/pca/admix.geno Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,303 @@ +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +122222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +122222 +222222 +222222 +222222 +222222 +222222 +222222 +222212 +222222 +222222 +222221 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +212222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +122211 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +122222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222022 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +221221 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222122 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +122222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222212 +222222 +222222 +222222
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/pca/admix.ind Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,6 @@ +PB1 M All_Individuals +PB2 M All_Individuals +PB3 M All_Individuals +PB4 M All_Individuals +PB6 M All_Individuals +PB8 M All_Individuals
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/pca/admix.snp Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,303 @@ + snp1 11 0.002 2000 A T + snp3 11 0.002 2000 A T + snp4 11 0.002 2000 A T + snp5 11 0.002 2000 A T + snp6 11 0.002 2000 A T + snp7 11 0.002 2000 A T + snp8 11 0.002 2000 A T + snp9 11 0.002 2000 A T + snp10 11 0.002 2000 A T + snp11 11 0.002 2000 A T + snp12 11 0.002 2000 A T + snp13 11 0.002 2000 A T + snp14 11 0.002 2000 A T + snp16 11 0.002 2000 A T + snp17 11 0.002 2000 A T + snp22 11 0.002 2000 A T + snp24 11 0.002 2000 A T + snp25 11 0.002 2000 A T + snp27 11 0.002 2000 A T + snp28 11 0.002 2000 A T + snp29 11 0.002 2000 A T + snp30 11 0.002 2000 A T + snp31 11 0.002 2000 A T + snp33 11 0.002 2000 A T + snp34 11 0.002 2000 A T + snp37 11 0.002 2000 A T + snp38 11 0.002 2000 A T + snp39 11 0.002 2000 A T + snp40 11 0.002 2000 A T + snp41 11 0.002 2000 A T + snp42 11 0.002 2000 A T + snp43 11 0.002 2000 A T + snp45 11 0.002 2000 A T + snp46 11 0.002 2000 A T + snp47 11 0.002 2000 A T + snp48 11 0.002 2000 A T + snp49 11 0.002 2000 A T + snp50 11 0.002 2000 A T + snp51 11 0.002 2000 A T + snp52 11 0.002 2000 A T + snp53 11 0.002 2000 A T + snp54 11 0.002 2000 A T + snp56 11 0.002 2000 A T + snp58 11 0.002 2000 A T + snp59 11 0.002 2000 A T + snp60 11 0.002 2000 A T + snp61 11 0.002 2000 A T + snp62 11 0.002 2000 A T + snp63 11 0.002 2000 A T + snp64 11 0.002 2000 A T + snp65 11 0.002 2000 A T + snp67 11 0.002 2000 A T + snp68 11 0.002 2000 A T + snp70 11 0.002 2000 A T + snp71 11 0.002 2000 A T + snp72 11 0.002 2000 A T + snp73 11 0.002 2000 A T + snp74 11 0.002 2000 A T + snp75 11 0.002 2000 A T + snp76 11 0.002 2000 A T + snp77 11 0.002 2000 A T + snp78 11 0.002 2000 A T + snp80 11 0.002 2000 A T + snp81 11 0.002 2000 A T + snp83 11 0.002 2000 A T + snp84 11 0.002 2000 A T + snp87 11 0.002 2000 A T + snp89 11 0.002 2000 A T + snp90 11 0.002 2000 A T + snp91 11 0.002 2000 A T + snp92 11 0.002 2000 A T + snp93 11 0.002 2000 A T + snp94 11 0.002 2000 A T + snp98 11 0.002 2000 A T + snp100 11 0.002 2000 A T + snp101 11 0.002 2000 A T + snp102 11 0.002 2000 A T + snp103 11 0.002 2000 A T + snp104 11 0.002 2000 A T + snp105 11 0.002 2000 A T + snp106 11 0.002 2000 A T + snp107 11 0.002 2000 A T + snp108 11 0.002 2000 A T + snp110 11 0.002 2000 A T + snp111 11 0.002 2000 A T + snp112 11 0.002 2000 A T + snp113 11 0.002 2000 A T + snp116 11 0.002 2000 A T + snp117 11 0.002 2000 A T + snp118 11 0.002 2000 A T + snp119 11 0.002 2000 A T + snp121 11 0.002 2000 A T + snp122 11 0.002 2000 A T + snp123 11 0.002 2000 A T + snp124 11 0.002 2000 A T + snp125 11 0.002 2000 A T + snp126 11 0.002 2000 A T + snp128 11 0.002 2000 A T + snp129 11 0.002 2000 A T + snp131 11 0.002 2000 A T + snp133 11 0.002 2000 A T + snp134 11 0.002 2000 A T + snp135 11 0.002 2000 A T + snp137 11 0.002 2000 A T + snp138 11 0.002 2000 A T + snp139 11 0.002 2000 A T + snp140 11 0.002 2000 A T + snp141 11 0.002 2000 A T + snp143 11 0.002 2000 A T + snp145 11 0.002 2000 A T + snp146 11 0.002 2000 A T + snp148 11 0.002 2000 A T + snp149 11 0.002 2000 A T + snp150 11 0.002 2000 A T + snp151 11 0.002 2000 A T + snp152 11 0.002 2000 A T + snp153 11 0.002 2000 A T + snp154 11 0.002 2000 A T + snp156 11 0.002 2000 A T + snp157 11 0.002 2000 A T + snp158 11 0.002 2000 A T + snp159 11 0.002 2000 A T + snp160 11 0.002 2000 A T + snp161 11 0.002 2000 A T + snp162 11 0.002 2000 A T + snp164 11 0.002 2000 A T + snp165 11 0.002 2000 A T + snp167 11 0.002 2000 A T + snp168 11 0.002 2000 A T + snp169 11 0.002 2000 A T + snp170 11 0.002 2000 A T + snp171 11 0.002 2000 A T + snp172 11 0.002 2000 A T + snp174 11 0.002 2000 A T + snp175 11 0.002 2000 A T + snp176 11 0.002 2000 A T + snp177 11 0.002 2000 A T + snp178 11 0.002 2000 A T + snp179 11 0.002 2000 A T + snp181 11 0.002 2000 A T + snp182 11 0.002 2000 A T + snp183 11 0.002 2000 A T + snp184 11 0.002 2000 A T + snp185 11 0.002 2000 A T + snp186 11 0.002 2000 A T + snp188 11 0.002 2000 A T + snp191 11 0.002 2000 A T + snp192 11 0.002 2000 A T + snp193 11 0.002 2000 A T + snp195 11 0.002 2000 A T + snp196 11 0.002 2000 A T + snp197 11 0.002 2000 A T + snp199 11 0.002 2000 A T + snp200 11 0.002 2000 A T + snp201 11 0.002 2000 A T + snp202 11 0.002 2000 A T + snp203 11 0.002 2000 A T + snp205 11 0.002 2000 A T + snp207 11 0.002 2000 A T + snp210 11 0.002 2000 A T + snp211 11 0.002 2000 A T + snp212 11 0.002 2000 A T + snp213 11 0.002 2000 A T + snp214 11 0.002 2000 A T + snp215 11 0.002 2000 A T + snp216 11 0.002 2000 A T + snp217 11 0.002 2000 A T + snp218 11 0.002 2000 A T + snp219 11 0.002 2000 A T + snp220 11 0.002 2000 A T + snp221 11 0.002 2000 A T + snp223 11 0.002 2000 A T + snp224 11 0.002 2000 A T + snp225 11 0.002 2000 A T + snp226 11 0.002 2000 A T + snp227 11 0.002 2000 A T + snp228 11 0.002 2000 A T + snp229 11 0.002 2000 A T + snp230 11 0.002 2000 A T + snp231 11 0.002 2000 A T + snp232 11 0.002 2000 A T + snp235 11 0.002 2000 A T + snp236 11 0.002 2000 A T + snp237 11 0.002 2000 A T + snp239 11 0.002 2000 A T + snp240 11 0.002 2000 A T + snp241 11 0.002 2000 A T + snp242 11 0.002 2000 A T + snp243 11 0.002 2000 A T + snp244 11 0.002 2000 A T + snp246 11 0.002 2000 A T + snp247 11 0.002 2000 A T + snp248 11 0.002 2000 A T + snp249 11 0.002 2000 A T + snp250 11 0.002 2000 A T + snp251 11 0.002 2000 A T + snp252 11 0.002 2000 A T + snp253 11 0.002 2000 A T + snp254 11 0.002 2000 A T + snp255 11 0.002 2000 A T + snp256 11 0.002 2000 A T + snp257 11 0.002 2000 A T + snp258 11 0.002 2000 A T + snp260 11 0.002 2000 A T + snp261 11 0.002 2000 A T + snp262 11 0.002 2000 A T + snp263 11 0.002 2000 A T + snp264 11 0.002 2000 A T + snp265 11 0.002 2000 A T + snp266 11 0.002 2000 A T + snp267 11 0.002 2000 A T + snp268 11 0.002 2000 A T + snp269 11 0.002 2000 A T + snp270 11 0.002 2000 A T + snp271 11 0.002 2000 A T + snp273 11 0.002 2000 A T + snp274 11 0.002 2000 A T + snp275 11 0.002 2000 A T + snp276 11 0.002 2000 A T + snp277 11 0.002 2000 A T + snp278 11 0.002 2000 A T + snp281 11 0.002 2000 A T + snp282 11 0.002 2000 A T + snp284 11 0.002 2000 A T + snp287 11 0.002 2000 A T + snp288 11 0.002 2000 A T + snp289 11 0.002 2000 A T + snp290 11 0.002 2000 A T + snp291 11 0.002 2000 A T + snp292 11 0.002 2000 A T + snp293 11 0.002 2000 A T + snp294 11 0.002 2000 A T + snp297 11 0.002 2000 A T + snp298 11 0.002 2000 A T + snp299 11 0.002 2000 A T + snp300 11 0.002 2000 A T + snp301 11 0.002 2000 A T + snp302 11 0.002 2000 A T + snp303 11 0.002 2000 A T + snp304 11 0.002 2000 A T + snp307 11 0.002 2000 A T + snp308 11 0.002 2000 A T + snp309 11 0.002 2000 A T + snp310 11 0.002 2000 A T + snp312 11 0.002 2000 A T + snp313 11 0.002 2000 A T + snp316 11 0.002 2000 A T + snp317 11 0.002 2000 A T + snp320 11 0.002 2000 A T + snp321 11 0.002 2000 A T + snp322 11 0.002 2000 A T + snp323 11 0.002 2000 A T + snp324 11 0.002 2000 A T + snp325 11 0.002 2000 A T + snp328 11 0.002 2000 A T + snp329 11 0.002 2000 A T + snp331 11 0.002 2000 A T + snp332 11 0.002 2000 A T + snp333 11 0.002 2000 A T + snp334 11 0.002 2000 A T + snp335 11 0.002 2000 A T + snp336 11 0.002 2000 A T + snp338 11 0.002 2000 A T + snp339 11 0.002 2000 A T + snp341 11 0.002 2000 A T + snp342 11 0.002 2000 A T + snp344 11 0.002 2000 A T + snp345 11 0.002 2000 A T + snp348 11 0.002 2000 A T + snp350 11 0.002 2000 A T + snp352 11 0.002 2000 A T + snp353 11 0.002 2000 A T + snp354 11 0.002 2000 A T + snp355 11 0.002 2000 A T + snp360 11 0.002 2000 A T + snp361 11 0.002 2000 A T + snp362 11 0.002 2000 A T + snp364 11 0.002 2000 A T + snp366 11 0.002 2000 A T + snp369 11 0.002 2000 A T + snp370 11 0.002 2000 A T + snp371 11 0.002 2000 A T + snp372 11 0.002 2000 A T + snp373 11 0.002 2000 A T + snp374 11 0.002 2000 A T + snp375 11 0.002 2000 A T + snp376 11 0.002 2000 A T + snp377 11 0.002 2000 A T + snp378 11 0.002 2000 A T + snp379 11 0.002 2000 A T + snp380 11 0.002 2000 A T + snp381 11 0.002 2000 A T + snp382 11 0.002 2000 A T + snp383 11 0.002 2000 A T + snp384 11 0.002 2000 A T + snp385 11 0.002 2000 A T + snp386 11 0.002 2000 A T + snp389 11 0.002 2000 A T + snp390 11 0.002 2000 A T + snp393 11 0.002 2000 A T + snp395 11 0.002 2000 A T + snp397 11 0.002 2000 A T + snp400 11 0.002 2000 A T
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/pca/coordinates.txt Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,7 @@ + #eigvals: 3.243 1.103 + PB1 0.1887 0.4703 All_Individuals + PB2 0.0398 0.0455 All_Individuals + PB3 0.1647 -0.6945 All_Individuals + PB4 -0.8954 -0.0220 All_Individuals + PB6 0.1887 0.4703 All_Individuals + PB8 0.3135 -0.2696 All_Individuals
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/pca/explained.txt Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,4 @@ +Percentage explained by eigenvectors: +1: 64.9% +2: 22.1% +3: 13.1%
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/pca/par.admix Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,7 @@ +genotypename: /scratch/galaxy/home/oocyte/galaxy_oocyte/database/files/000/dataset_260_files/admix.geno +snpname: /scratch/galaxy/home/oocyte/galaxy_oocyte/database/files/000/dataset_260_files/admix.snp +indivname: /scratch/galaxy/home/oocyte/galaxy_oocyte/database/files/000/dataset_260_files/admix.ind +evecoutname: /scratch/galaxy/home/oocyte/galaxy_oocyte/database/files/000/dataset_260_files/coordinates.txt +evaloutname: /scratch/galaxy/home/oocyte/galaxy_oocyte/database/files/000/dataset_260_files/admix.eval +altnormstyle: NO +numoutevec: 2
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/pca/pca.html Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,37 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml"> + <head> + <meta http-equiv="Content-type" content="text/html; charset=UTF-8" /> + <link rel="stylesheet" href="/static/style/base.css" type="text/css" /> + <title>PCA Galaxy Composite Dataset</title> + </head> + <body> + <div class="document"> + Output completed: 2012-04-03 02:19:05 PM + <p/> + <div id="gd_outputs"> + Outputs + <ul> + <li><a href="PCA.pdf">PCA.pdf</a></li> + <li><a href="coordinates.txt">coordinates.txt</a></li> + <li><a href="explained.txt">explained.txt</a></li> + </ul> + </div> + <div id="gd_inputs"> + Inputs + <ul> + <li><a href="par.admix">par.admix</a></li> + <li><a href="admix.geno">admix.geno</a></li> + <li><a href="admix.snp">admix.snp</a></li> + <li><a href="admix.ind">admix.ind</a></li> + </ul> + </div> + <div id="gd_misc"> + Stats<p/><pre> + +</pre> + </div> + </div> + </body> +</html>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/phylogenetic_tree/distance_matrix.phylip Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,8 @@ +7 + canFam2 0.0000 0.3205 0.3085 0.3193 0.3101 0.3138 0.3170 + PB1 0.3205 0.0000 0.0103 0.0100 0.0130 0.0119 0.0112 + PB2 0.3085 0.0103 0.0000 0.0033 0.0062 0.0094 0.0062 + PB3 0.3193 0.0100 0.0033 0.0000 0.0081 0.0091 0.0054 + PB4 0.3101 0.0130 0.0062 0.0081 0.0000 0.0099 0.0088 + PB6 0.3138 0.0119 0.0094 0.0091 0.0099 0.0000 0.0079 + PB8 0.3170 0.0112 0.0062 0.0054 0.0088 0.0079 0.0000
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/phylogenetic_tree/informative_snps.txt Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,7 @@ + canFam2 0 338 339 350 345 342 344 + PB1 338 0 338 344 338 336 339 + PB2 339 338 0 345 338 339 338 + PB3 350 344 345 0 347 342 347 + PB4 345 338 338 347 0 337 341 + PB6 342 336 339 342 337 0 343 + PB8 344 339 338 347 341 343 0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/phylogenetic_tree/mega_distance_matrix.txt Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,27 @@ +#mega +!Title: Galaxy; +!Format DataType=Distance DataFormat=LowerLeft NTaxa=7; + +[1] #canFam2 +[2] #PB1 +[3] #PB2 +[4] #PB3 +[5] #PB4 +[6] #PB6 +[7] #PB8 + + + +[ 1 2 3 4 5 6 7 ] +[1] +[2] 0.3205 +[3] 0.3085 0.0103 +[4] 0.3193 0.0100 0.0033 +[5] 0.3101 0.0130 0.0062 0.0081 +[6] 0.3138 0.0119 0.0094 0.0091 0.0099 +[7] 0.3170 0.0112 0.0062 0.0054 0.0088 0.0079 + + + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/phylogenetic_tree/phylogenetic_tree.html Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,49 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml"> + <head> + <meta http-equiv="Content-type" content="text/html; charset=UTF-8" /> + <link rel="stylesheet" href="/static/style/base.css" type="text/css" /> + <title>Phylogenetic tree Galaxy Composite Dataset</title> + </head> + <body> + <div class="document"> + Output completed: 2012-04-03 01:57:44 PM + <p/> + <div id="gd_outputs"> + Outputs + <ul> + <li><a href="tree.pdf">tree.pdf</a></li> + <li><a href="phylogenetic_tree.newick">phylogenetic tree (newick)</a></li> + <li><a href="distance_matrix.phylip">Phylip distance matrix</a></li> + <li><a href="mega_distance_matrix.txt">Mega distance matrix</a></li> + <li><a href="informative_snps.txt">informative SNPs</a></li> + </ul> + </div> + <div id="gd_inputs"> + Inputs + <ul> + <li>Minimum coverage: 3</li> + <li>Minimum quality: 30</li> + <li>Include reference sequence: yes</li> + <li>Data source: sequence coverage</li> + <li>Branch type: square</li> + <li>Draw branches to scale: yes</li> + <li>Show branch lengths: yes</li> + <li>Tree layout: horizontal</li> + </ul> + </div> + <div id="gd_misc"> + Individuals +<ol> +<li>PB1</li> +<li>PB2</li> +<li>PB3</li> +<li>PB4</li> +<li>PB6</li> +<li>PB8</li> +</ol> + </div> + </div> + </body> +</html>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/phylogenetic_tree/phylogenetic_tree.newick Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,16 @@ +( +( +( +PB4:0.00174, +canFam2:0.30836) +:0.00188, +PB2:0.00042) +:0.00210, +( +PB6:0.00470, +PB1:0.00720) +:0.00035, +( +PB8:0.00288, +PB3:0.00252) +:0.00055);
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/population_structure/graphical.pdf Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,147 @@ +%PDF-1.4 +%âãÏÓ\r +1 0 obj +<< +/CreationDate (D:20120403142055) +/ModDate (D:20120403142055) +/Title (R Graphics Output) +/Producer (R 2.11.0) +/Creator (R) +>> +endobj +2 0 obj +<< +/Type /Catalog +/Pages 3 0 R +>> +endobj +5 0 obj +<< +/Type /Page +/Parent 3 0 R +/Contents 6 0 R +/Resources 4 0 R +>> +endobj +6 0 obj +<< +/Length 7 0 R +>> +stream +1 J 1 j q +Q q +1.000 0.000 0.000 rg +74.40 74.27 54.86 0.00 re f +0.000 1.000 1.000 rg +74.40 74.27 54.86 82.69 re f +1.000 0.000 0.000 rg +140.23 74.27 54.86 82.69 re f +0.000 1.000 1.000 rg +140.23 156.96 54.86 0.00 re f +1.000 0.000 0.000 rg +206.06 74.27 54.86 82.69 re f +0.000 1.000 1.000 rg +206.06 156.96 54.86 0.00 re f +1.000 0.000 0.000 rg +271.89 74.27 54.86 0.00 re f +0.000 1.000 1.000 rg +271.89 74.27 54.86 82.69 re f +1.000 0.000 0.000 rg +337.71 74.27 54.86 82.69 re f +0.000 1.000 1.000 rg +337.71 156.96 54.86 0.00 re f +1.000 0.000 0.000 rg +403.54 74.27 54.86 82.69 re f +0.000 1.000 1.000 rg +403.54 156.96 54.86 0.00 re f +BT +0.000 0.000 0.000 rg +/F2 1 Tf 12.00 0.00 -0.00 12.00 236.05 18.72 Tm (Individual #) Tj +ET +BT +/F2 1 Tf 0.00 12.00 -12.00 0.00 12.96 91.68 Tm [(Ancestr) -30 (y)] TJ +ET +Q q +0.000 0.000 0.000 RG +0.75 w +[] 0 d +1 J +1 j +10.00 M +59.04 74.27 m 59.04 156.96 l S +59.04 74.27 m 51.84 74.27 l S +59.04 90.81 m 51.84 90.81 l S +59.04 107.34 m 51.84 107.34 l S +59.04 123.88 m 51.84 123.88 l S +59.04 140.42 m 51.84 140.42 l S +59.04 156.96 m 51.84 156.96 l S +BT +0.000 0.000 0.000 rg +/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 65.93 Tm (0.0) Tj +ET +BT +/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 99.00 Tm (0.4) Tj +ET +BT +/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 132.08 Tm (0.8) Tj +ET +Q +endstream +endobj +7 0 obj +1275 +endobj +3 0 obj +<< +/Type /Pages +/Kids [ +5 0 R +] +/Count 1 +/MediaBox [0 0 504 216] +>> +endobj +4 0 obj +<< +/ProcSet [/PDF /Text] +/Font <</F2 9 0 R >> +/ExtGState << >> +>> +endobj +8 0 obj +<< +/Type /Encoding +/BaseEncoding /WinAnsiEncoding +/Differences [ 45/minus 96/quoteleft +144/dotlessi /grave /acute /circumflex /tilde /macron /breve /dotaccent +/dieresis /.notdef /ring /cedilla /.notdef /hungarumlaut /ogonek /caron /space] +>> +endobj +9 0 obj << +/Type /Font +/Subtype /Type1 +/Name /F2 +/BaseFont /Helvetica +/Encoding 8 0 R +>> endobj +xref +0 10 +0000000000 65535 f +0000000021 00000 n +0000000164 00000 n +0000001641 00000 n +0000001724 00000 n +0000000213 00000 n +0000000293 00000 n +0000001621 00000 n +0000001805 00000 n +0000002062 00000 n +trailer +<< +/Size 10 +/Info 1 0 R +/Root 2 0 R +>> +startxref +2158 +%%EOF
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/population_structure/numeric.txt Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,6 @@ +0.000010 0.999990 +0.999990 0.000010 +0.999990 0.000010 +0.000010 0.999990 +0.999990 0.000010 +0.999990 0.000010
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/population_structure/population_structure.html Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,44 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml"> + <head> + <meta http-equiv="Content-type" content="text/html; charset=UTF-8" /> + <link rel="stylesheet" href="/static/style/base.css" type="text/css" /> + <title>Population structure Galaxy Composite Dataset</title> + </head> + <body> + <div class="document"> + Output completed: 2012-04-03 02:20:55 PM + <p/> + <div id="gd_outputs"> + Outputs + <ul> + <li><a href="graphical.pdf">graphical.pdf</a></li> + <li><a href="numeric.txt">numeric.txt</a></li> + </ul> + </div> + <div id="gd_inputs"> + Inputs + <ul> + <li>Number of populations: 2</li> + </ul> + </div> + <div id="gd_misc"> + Populations +<ul> +<li> +All Individuals +<ol> +<li>PB1</li> +<li>PB2</li> +<li>PB3</li> +<li>PB4</li> +<li>PB6</li> +<li>PB8</li> +</ol> +</li> +</ul> + </div> + </div> + </body> +</html>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/prepare_population_structure/admix.map Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,303 @@ +1 snp1 0 2 +1 snp3 0 4 +1 snp4 0 5 +1 snp5 0 6 +1 snp6 0 7 +1 snp7 0 8 +1 snp8 0 9 +1 snp9 0 10 +1 snp10 0 11 +1 snp11 0 12 +1 snp12 0 13 +1 snp13 0 14 +1 snp14 0 15 +1 snp16 0 17 +1 snp17 0 18 +1 snp22 0 23 +1 snp24 0 25 +1 snp25 0 26 +1 snp27 0 28 +1 snp28 0 29 +1 snp29 0 30 +1 snp30 0 31 +1 snp31 0 32 +1 snp33 0 34 +1 snp34 0 35 +1 snp37 0 38 +1 snp38 0 39 +1 snp39 0 40 +1 snp40 0 41 +1 snp41 0 42 +1 snp42 0 43 +1 snp43 0 44 +1 snp45 0 46 +1 snp46 0 47 +1 snp47 0 48 +1 snp48 0 49 +1 snp49 0 50 +1 snp50 0 51 +1 snp51 0 52 +1 snp52 0 53 +1 snp53 0 54 +1 snp54 0 55 +1 snp56 0 57 +1 snp58 0 59 +1 snp59 0 60 +1 snp60 0 61 +1 snp61 0 62 +1 snp62 0 63 +1 snp63 0 64 +1 snp64 0 65 +1 snp65 0 66 +1 snp67 0 68 +1 snp68 0 69 +1 snp70 0 71 +1 snp71 0 72 +1 snp72 0 73 +1 snp73 0 74 +1 snp74 0 75 +1 snp75 0 76 +1 snp76 0 77 +1 snp77 0 78 +1 snp78 0 79 +1 snp80 0 81 +1 snp81 0 82 +1 snp83 0 84 +1 snp84 0 85 +1 snp87 0 88 +1 snp89 0 90 +1 snp90 0 91 +1 snp91 0 92 +1 snp92 0 93 +1 snp93 0 94 +1 snp94 0 95 +1 snp98 0 99 +1 snp100 0 101 +1 snp101 0 102 +1 snp102 0 103 +1 snp103 0 104 +1 snp104 0 105 +1 snp105 0 106 +1 snp106 0 107 +1 snp107 0 108 +1 snp108 0 109 +1 snp110 0 111 +1 snp111 0 112 +1 snp112 0 113 +1 snp113 0 114 +1 snp116 0 117 +1 snp117 0 118 +1 snp118 0 119 +1 snp119 0 120 +1 snp121 0 122 +1 snp122 0 123 +1 snp123 0 124 +1 snp124 0 125 +1 snp125 0 126 +1 snp126 0 127 +1 snp128 0 129 +1 snp129 0 130 +1 snp131 0 132 +1 snp133 0 134 +1 snp134 0 135 +1 snp135 0 136 +1 snp137 0 138 +1 snp138 0 139 +1 snp139 0 140 +1 snp140 0 141 +1 snp141 0 142 +1 snp143 0 144 +1 snp145 0 146 +1 snp146 0 147 +1 snp148 0 149 +1 snp149 0 150 +1 snp150 0 151 +1 snp151 0 152 +1 snp152 0 153 +1 snp153 0 154 +1 snp154 0 155 +1 snp156 0 157 +1 snp157 0 158 +1 snp158 0 159 +1 snp159 0 160 +1 snp160 0 161 +1 snp161 0 162 +1 snp162 0 163 +1 snp164 0 165 +1 snp165 0 166 +1 snp167 0 168 +1 snp168 0 169 +1 snp169 0 170 +1 snp170 0 171 +1 snp171 0 172 +1 snp172 0 173 +1 snp174 0 175 +1 snp175 0 176 +1 snp176 0 177 +1 snp177 0 178 +1 snp178 0 179 +1 snp179 0 180 +1 snp181 0 182 +1 snp182 0 183 +1 snp183 0 184 +1 snp184 0 185 +1 snp185 0 186 +1 snp186 0 187 +1 snp188 0 189 +1 snp191 0 192 +1 snp192 0 193 +1 snp193 0 194 +1 snp195 0 196 +1 snp196 0 197 +1 snp197 0 198 +1 snp199 0 200 +1 snp200 0 201 +1 snp201 0 202 +1 snp202 0 203 +1 snp203 0 204 +1 snp205 0 206 +1 snp207 0 208 +1 snp210 0 211 +1 snp211 0 212 +1 snp212 0 213 +1 snp213 0 214 +1 snp214 0 215 +1 snp215 0 216 +1 snp216 0 217 +1 snp217 0 218 +1 snp218 0 219 +1 snp219 0 220 +1 snp220 0 221 +1 snp221 0 222 +1 snp223 0 224 +1 snp224 0 225 +1 snp225 0 226 +1 snp226 0 227 +1 snp227 0 228 +1 snp228 0 229 +1 snp229 0 230 +1 snp230 0 231 +1 snp231 0 232 +1 snp232 0 233 +1 snp235 0 236 +1 snp236 0 237 +1 snp237 0 238 +1 snp239 0 240 +1 snp240 0 241 +1 snp241 0 242 +1 snp242 0 243 +1 snp243 0 244 +1 snp244 0 245 +1 snp246 0 247 +1 snp247 0 248 +1 snp248 0 249 +1 snp249 0 250 +1 snp250 0 251 +1 snp251 0 252 +1 snp252 0 253 +1 snp253 0 254 +1 snp254 0 255 +1 snp255 0 256 +1 snp256 0 257 +1 snp257 0 258 +1 snp258 0 259 +1 snp260 0 261 +1 snp261 0 262 +1 snp262 0 263 +1 snp263 0 264 +1 snp264 0 265 +1 snp265 0 266 +1 snp266 0 267 +1 snp267 0 268 +1 snp268 0 269 +1 snp269 0 270 +1 snp270 0 271 +1 snp271 0 272 +1 snp273 0 274 +1 snp274 0 275 +1 snp275 0 276 +1 snp276 0 277 +1 snp277 0 278 +1 snp278 0 279 +1 snp281 0 282 +1 snp282 0 283 +1 snp284 0 285 +1 snp287 0 288 +1 snp288 0 289 +1 snp289 0 290 +1 snp290 0 291 +1 snp291 0 292 +1 snp292 0 293 +1 snp293 0 294 +1 snp294 0 295 +1 snp297 0 298 +1 snp298 0 299 +1 snp299 0 300 +1 snp300 0 301 +1 snp301 0 302 +1 snp302 0 303 +1 snp303 0 304 +1 snp304 0 305 +1 snp307 0 308 +1 snp308 0 309 +1 snp309 0 310 +1 snp310 0 311 +1 snp312 0 313 +1 snp313 0 314 +1 snp316 0 317 +1 snp317 0 318 +1 snp320 0 321 +1 snp321 0 322 +1 snp322 0 323 +1 snp323 0 324 +1 snp324 0 325 +1 snp325 0 326 +1 snp328 0 329 +1 snp329 0 330 +1 snp331 0 332 +1 snp332 0 333 +1 snp333 0 334 +1 snp334 0 335 +1 snp335 0 336 +1 snp336 0 337 +1 snp338 0 339 +1 snp339 0 340 +1 snp341 0 342 +1 snp342 0 343 +1 snp344 0 345 +1 snp345 0 346 +1 snp348 0 349 +1 snp350 0 351 +1 snp352 0 353 +1 snp353 0 354 +1 snp354 0 355 +1 snp355 0 356 +1 snp360 0 361 +1 snp361 0 362 +1 snp362 0 363 +1 snp364 0 365 +1 snp366 0 367 +1 snp369 0 370 +1 snp370 0 371 +1 snp371 0 372 +1 snp372 0 373 +1 snp373 0 374 +1 snp374 0 375 +1 snp375 0 376 +1 snp376 0 377 +1 snp377 0 378 +1 snp378 0 379 +1 snp379 0 380 +1 snp380 0 381 +1 snp381 0 382 +1 snp382 0 383 +1 snp383 0 384 +1 snp384 0 385 +1 snp385 0 386 +1 snp386 0 387 +1 snp389 0 390 +1 snp390 0 391 +1 snp393 0 394 +1 snp395 0 396 +1 snp397 0 398 +1 snp400 0 401
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/prepare_population_structure/admix.ped Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,6 @@ +PB1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +PB2 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +PB3 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +PB4 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +PB6 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 +PB8 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/prepare_population_structure/prepare_population_structure.html Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,47 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml"> + <head> + <meta http-equiv="Content-type" content="text/html; charset=UTF-8" /> + <link rel="stylesheet" href="/static/style/base.css" type="text/css" /> + <title>Prepare to look for population structure Galaxy Composite Dataset</title> + </head> + <body> + <div class="document"> + Output completed: 2012-04-03 02:17:44 PM + <p/> + <div id="gd_outputs"> + Outputs + <ul> + <li><a href="admix.ped">admix.ped</a></li> + <li><a href="admix.map">admix.map</a></li> + <li>Using 303 of 400 SNPs</li> + </ul> + </div> + <div id="gd_inputs"> + Inputs + <ul> + <li>Minimum reads covering a SNP, per individual: 3</li> + <li>Minimum quality value, per individual: 30</li> + <li>Minimum spacing between SNPs on the same scaffold: 0</li> + </ul> + </div> + <div id="gd_misc"> + Populations +<ul> +<li> +All Individuals +<ol> +<li>PB1</li> +<li>PB2</li> +<li>PB3</li> +<li>PB4</li> +<li>PB6</li> +<li>PB8</li> +</ol> +</li> +</ul> + </div> + </div> + </body> +</html>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/rank_pathways/rank_pathways.tabular Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,240 @@ +3 0.25 1 cfa03450=Non-homologous end-joining +1 0.25 1 cfa00750=Vitamin B6 metabolism +2 0.2 3 cfa00290=Valine, leucine and isoleucine biosynthesis +3 0.18 4 cfa00770=Pantothenate and CoA biosynthesis +5 0.17 5 cfa05310=Asthma +4 0.16 6 cfa00760=Nicotinate and nicotinamide metabolism +2 0.12 7 cfa00450=Selenocompound metabolism +4 0.11 8 cfa05330=Allograft rejection +5 0.098 9 cfa04672=Intestinal immune network for IgA production +4 0.098 9 cfa02010=ABC transporters +2 0.095 11 cfa03430=Mismatch repair +4 0.089 12 cfa05320=Autoimmune thyroid disease +4 0.089 12 cfa00280=Valine, leucine and isoleucine degradation +3 0.088 14 cfa03410=Base excision repair +3 0.088 14 cfa03030=DNA replication +3 0.088 14 cfa00565=Ether lipid metabolism +6 0.087 17 cfa05140=Leishmaniasis +2 0.087 17 cfa04977=Vitamin digestion and absorption +1 0.083 19 cfa00740=Riboflavin metabolism +4 0.08 20 cfa05150=Staphylococcus aureus infection +2 0.08 20 cfa03060=Protein export +3 0.079 22 cfa05340=Primary immunodeficiency +3 0.079 22 cfa05143=African trypanosomiasis +6 0.078 24 cfa00564=Glycerophospholipid metabolism +2 0.077 25 cfa00410=beta-Alanine metabolism +2 0.071 26 cfa05332=Graft-versus-host disease +5 0.069 27 cfa03320=PPAR signaling pathway +6 0.066 28 cfa05323=Rheumatoid arthritis +5 0.063 29 cfa04664=Fc epsilon RI signaling pathway +3 0.062 30 cfa00561=Glycerolipid metabolism +2 0.062 30 cfa00350=Tyrosine metabolism +2 0.062 30 cfa00020=Citrate cycle (TCA cycle) +2 0.059 33 cfa00260=Glycine, serine and threonine metabolism +1 0.059 33 cfa04614=Renin-angiotensin system +1 0.059 33 cfa00360=Phenylalanine metabolism +9 0.058 36 cfa04145=Phagosome +3 0.058 36 cfa05213=Endometrial cancer +4 0.057 38 cfa05416=Viral myocarditis +2 0.057 38 cfa00500=Starch and sucrose metabolism +2 0.056 40 cfa04130=SNARE interactions in vesicular transport +1 0.056 40 cfa00592=alpha-Linolenic acid metabolism +1 0.053 42 cfa04964=Proximal tubule bicarbonate reclamation +1 0.053 42 cfa00630=Glyoxylate and dicarboxylate metabolism +3 0.052 44 cfa04621=NOD-like receptor signaling pathway +2 0.05 45 cfa05219=Bladder cancer +2 0.05 45 cfa04940=Type I diabetes mellitus +2 0.05 45 cfa00380=Tryptophan metabolism +2 0.047 48 cfa03420=Nucleotide excision repair +3 0.045 49 cfa04920=Adipocytokine signaling pathway +3 0.045 49 cfa00970=Aminoacyl-tRNA biosynthesis +2 0.045 49 cfa00071=Fatty acid metabolism +1 0.045 49 cfa00591=Linoleic acid metabolism +1 0.045 49 cfa00340=Histidine metabolism +4 0.043 54 cfa04972=Pancreatic secretion +2 0.043 54 cfa03022=Basal transcription factors +2 0.043 54 cfa00982=Drug metabolism - cytochrome P450 +3 0.042 57 cfa05218=Melanoma +3 0.042 57 cfa05211=Renal cell carcinoma +4 0.041 59 cfa05414=Dilated cardiomyopathy +2 0.04 60 cfa00590=Arachidonic acid metabolism +1 0.04 60 cfa04320=Dorso-ventral axis formation +3 0.039 62 cfa04662=B cell receptor signaling pathway +2 0.039 62 cfa00310=Lysine degradation +3 0.038 64 cfa04512=ECM-receptor interaction +2 0.038 64 cfa05144=Malaria +2 0.038 64 cfa00270=Cysteine and methionine metabolism +1 0.038 64 cfa03440=Homologous recombination +1 0.038 64 cfa00052=Galactose metabolism +8 0.037 69 cfa04810=Regulation of actin cytoskeleton +4 0.037 69 cfa05146=Amoebiasis +4 0.037 69 cfa04666=Fc gamma R-mediated phagocytosis +2 0.037 69 cfa05223=Non-small cell lung cancer +6 0.036 73 cfa05168=Herpes simplex infection +6 0.036 73 cfa05152=Tuberculosis +3 0.036 73 cfa04640=Hematopoietic cell lineage +7 0.034 76 cfa04510=Focal adhesion +3 0.034 76 cfa00240=Pyrimidine metabolism +3 0.033 78 cfa03008=Ribosome biogenesis in eukaryotes +1 0.033 78 cfa00983=Drug metabolism - other enzymes +2 0.032 80 cfa04976=Bile secretion +6 0.031 81 cfa04060=Cytokine-cytokine receptor interaction +4 0.031 81 cfa04110=Cell cycle +1 0.031 81 cfa00250=Alanine, aspartate and glutamate metabolism +4 0.029 84 cfa05145=Toxoplasmosis +3 0.029 84 cfa04650=Natural killer cell mediated cytotoxicity +2 0.029 84 cfa05214=Glioma +4 0.028 87 cfa05162=Measles +2 0.028 87 cfa05412=Arrhythmogenic right ventricular cardiomyopathy (ARVC) +7 0.027 89 cfa05166=HTLV-I infection +4 0.027 89 cfa05322=Systemic lupus erythematosus +2 0.027 89 cfa05212=Pancreatic cancer +2 0.026 92 cfa04146=Peroxisome +2 0.026 92 cfa04070=Phosphatidylinositol signaling system +1 0.026 92 cfa04978=Mineral absorption +2 0.025 95 cfa05133=Pertussis +2 0.025 95 cfa04612=Antigen processing and presentation +2 0.025 95 cfa04350=TGF-beta signaling pathway +1 0.025 95 cfa00830=Retinol metabolism +3 0.024 99 cfa04514=Cell adhesion molecules (CAMs) +2 0.024 99 cfa05410=Hypertrophic cardiomyopathy (HCM) +2 0.024 99 cfa04012=ErbB signaling pathway +1 0.024 99 cfa00980=Metabolism of xenobiotics by cytochrome P450 +1 0.024 99 cfa00640=Propanoate metabolism +3 0.023 104 cfa04360=Axon guidance +2 0.023 104 cfa04620=Toll-like receptor signaling pathway +1 0.023 104 cfa04975=Fat digestion and absorption +1 0.023 104 cfa04330=Notch signaling pathway +7 0.022 108 cfa05200=Pathways in cancer +3 0.022 108 cfa04910=Insulin signaling pathway +2 0.022 108 cfa05215=Prostate cancer +1 0.022 108 cfa03460=Fanconi anemia pathway +24 0.021 112 cfa01100=Metabolic pathways +3 0.021 112 cfa04630=Jak-STAT signaling pathway +1 0.021 112 cfa00480=Glutathione metabolism +3 0.020 115 cfa00230=Purine metabolism +2 0.020 115 cfa04540=Gap junction +1 0.02 115 cfa00620=Pyruvate metabolism +2 0.019 118 cfa04912=GnRH signaling pathway +2 0.018 119 cfa05142=Chagas disease (American trypanosomiasis) +2 0.018 119 cfa04380=Osteoclast differentiation +1 0.018 119 cfa05221=Acute myeloid leukemia +1 0.018 119 cfa00330=Arginine and proline metabolism +3 0.017 123 cfa05164=Influenza A +2 0.017 123 cfa04270=Vascular smooth muscle contraction +2 0.017 123 cfa04114=Oocyte meiosis +3 0.016 126 cfa04141=Protein processing in endoplasmic reticulum +3 0.016 126 cfa04020=Calcium signaling pathway +2 0.016 126 cfa05160=Hepatitis C +2 0.016 126 cfa04670=Leukocyte transendothelial migration +1 0.016 126 cfa05210=Colorectal cancer +1 0.016 126 cfa04610=Complement and coagulation cascades +1 0.016 126 cfa04150=mTOR signaling pathway +4 0.015 133 cfa04010=MAPK signaling pathway +1 0.015 133 cfa04974=Protein digestion and absorption +1 0.015 133 cfa04730=Long-term depression +1 0.015 133 cfa04115=p53 signaling pathway +1 0.014 137 cfa05220=Chronic myeloid leukemia +1 0.014 137 cfa04971=Gastric acid secretion +1 0.014 137 cfa04720=Long-term potentiation +1 0.014 137 cfa04370=VEGF signaling pathway +1 0.014 137 cfa04260=Cardiac muscle contraction +1 0.014 137 cfa03018=RNA degradation +2 0.013 143 cfa00010=Glycolysis / Gluconeogenesis +1 0.013 143 cfa04970=Salivary secretion +1 0.013 143 cfa04520=Adherens junction +2 0.012 146 cfa04062=Chemokine signaling pathway +1 0.012 146 cfa05134=Legionellosis +1 0.012 146 cfa05132=Salmonella infection +1 0.012 146 cfa04727=GABAergic synapse +1 0.012 146 cfa04210=Apoptosis +1 0.011 151 cfa03015=mRNA surveillance pathway +1 0.010 152 cfa04914=Progesterone-mediated oocyte maturation +1 0.0098 153 cfa04916=Melanogenesis +2 0.0095 154 cfa04144=Endocytosis +1 0.0087 155 cfa04142=Lysosome +1 0.0086 156 cfa04660=T cell receptor signaling pathway +1 0.0082 157 cfa04724=Glutamatergic synapse +2 0.0081 158 cfa04080=Neuroactive ligand-receptor interaction +1 0.0079 159 cfa04728=Dopaminergic synapse +2 0.0074 160 cfa05010=Alzheimer's disease +1 0.0074 160 cfa04722=Neurotrophin signaling pathway +1 0.0074 160 cfa04120=Ubiquitin mediated proteolysis +1 0.0068 163 cfa00190=Oxidative phosphorylation +1 0.0067 164 cfa05012=Parkinson's disease +1 0.0057 165 cfa03013=RNA transport +1 0.0056 166 cfa03040=Spliceosome +1 0.0049 167 cfa05016=Huntington's disease +1 0.0023 168 cfa04740=Olfactory transduction +0 0 169 cfa05222=Small cell lung cancer +0 0 169 cfa05217=Basal cell carcinoma +0 0 169 cfa05216=Thyroid cancer +0 0 169 cfa05100=Bacterial invasion of epithelial cells +0 0 169 cfa05020=Prion diseases +0 0 169 cfa05014=Amyotrophic lateral sclerosis (ALS) +0 0 169 cfa04973=Carbohydrate digestion and absorption +0 0 169 cfa04966=Collecting duct acid secretion +0 0 169 cfa04962=Vasopressin-regulated water reabsorption +0 0 169 cfa04961=Endocrine and other factor-regulated calcium reabsorption +0 0 169 cfa04960=Aldosterone-regulated sodium reabsorption +0 0 169 cfa04950=Maturity onset diabetes of the young +0 0 169 cfa04930=Type II diabetes mellitus +0 0 169 cfa04744=Phototransduction +0 0 169 cfa04742=Taste transduction +0 0 169 cfa04725=Cholinergic synapse +0 0 169 cfa04721=Synaptic vesicle cycle +0 0 169 cfa04710=Circadian rhythm - mammal +0 0 169 cfa04623=Cytosolic DNA-sensing pathway +0 0 169 cfa04622=RIG-I-like receptor signaling pathway +0 0 169 cfa04530=Tight junction +0 0 169 cfa04340=Hedgehog signaling pathway +0 0 169 cfa04310=Wnt signaling pathway +0 0 169 cfa04140=Regulation of autophagy +0 0 169 cfa04122=Sulfur relay system +0 0 169 cfa03050=Proteasome +0 0 169 cfa03020=RNA polymerase +0 0 169 cfa03010=Ribosome +0 0 169 cfa01040=Biosynthesis of unsaturated fatty acids +0 0 169 cfa00920=Sulfur metabolism +0 0 169 cfa00910=Nitrogen metabolism +0 0 169 cfa00900=Terpenoid backbone biosynthesis +0 0 169 cfa00860=Porphyrin and chlorophyll metabolism +0 0 169 cfa00790=Folate biosynthesis +0 0 169 cfa00785=Lipoic acid metabolism +0 0 169 cfa00780=Biotin metabolism +0 0 169 cfa00730=Thiamine metabolism +0 0 169 cfa00670=One carbon pool by folate +0 0 169 cfa00650=Butanoate metabolism +0 0 169 cfa00604=Glycosphingolipid biosynthesis - ganglio series +0 0 169 cfa00603=Glycosphingolipid biosynthesis - globo series +0 0 169 cfa00601=Glycosphingolipid biosynthesis - lacto and neolacto series +0 0 169 cfa00600=Sphingolipid metabolism +0 0 169 cfa00563=Glycosylphosphatidylinositol(GPI)-anchor biosynthesis +0 0 169 cfa00562=Inositol phosphate metabolism +0 0 169 cfa00534=Glycosaminoglycan biosynthesis - heparan sulfate +0 0 169 cfa00533=Glycosaminoglycan biosynthesis - keratan sulfate +0 0 169 cfa00532=Glycosaminoglycan biosynthesis - chondroitin sulfate +0 0 169 cfa00531=Glycosaminoglycan degradation +0 0 169 cfa00520=Amino sugar and nucleotide sugar metabolism +0 0 169 cfa00514=Other types of O-glycan biosynthesis +0 0 169 cfa00512=Mucin type O-Glycan biosynthesis +0 0 169 cfa00511=Other glycan degradation +0 0 169 cfa00510=N-Glycan biosynthesis +0 0 169 cfa00472=D-Arginine and D-ornithine metabolism +0 0 169 cfa00471=D-Glutamine and D-glutamate metabolism +0 0 169 cfa00460=Cyanoamino acid metabolism +0 0 169 cfa00430=Taurine and hypotaurine metabolism +0 0 169 cfa00400=Phenylalanine, tyrosine and tryptophan biosynthesis +0 0 169 cfa00300=Lysine biosynthesis +0 0 169 cfa00232=Caffeine metabolism +0 0 169 cfa00140=Steroid hormone biosynthesis +0 0 169 cfa00130=Ubiquinone and other terpenoid-quinone biosynthesis +0 0 169 cfa00120=Primary bile acid biosynthesis +0 0 169 cfa00100=Steroid biosynthesis +0 0 169 cfa00072=Synthesis and degradation of ketone bodies +0 0 169 cfa00062=Fatty acid elongation in mitochondria +0 0 169 cfa00061=Fatty acid biosynthesis +0 0 169 cfa00053=Ascorbate and aldarate metabolism +0 0 169 cfa00051=Fructose and mannose metabolism +0 0 169 cfa00040=Pentose and glucuronate interconversions +0 0 169 cfa00030=Pentose phosphate pathway
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/select_snps/select_snps.wsf Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,102 @@ +#{"column_names":["scaf","pos","A","B","qual","ref","rpos","rnuc","1A","1B","1G","1Q","2A","2B","2G","2Q","3A","3B","3G","3Q","4A","4B","4G","4Q","5A","5B","5G","5Q","6A","6B","6G","6Q","pair","dist", +#"prim","rflp"],"dbkey":"canFam2","individuals":[["PB1",9],["PB2",13],["PB3",17],["PB4",21],["PB6",25],["PB8",29]],"pos":2,"rPos":7,"ref":6,"scaffold":1,"species":"bear"} +Contig161_chr1_4641264_4641879 115 C T 73.5 chr1 4641382 C 6 0 2 45 8 0 2 51 15 0 2 72 5 0 2 42 6 0 2 45 10 0 2 57 Y 54 0.323 0 +Contig86_chr1_30984450_30985684 670 C T 365.0 chr1 30985133 C 9 0 2 54 10 0 2 57 13 0 2 66 3 0 2 36 9 0 2 54 7 0 2 48 Y 145 0.031 0 +Contig21_chr1_60697952_60699446 307 G A 51.9 chr1 60698265 G 12 0 2 63 9 0 2 54 4 0 2 39 6 0 2 45 9 0 2 54 4 0 2 39 Y 98 0.507 0 +Contig64_chr1_87343284_87345672 163 T A 3.76 chr1 87343443 C 0 2 2 1 0 0 -1 0 5 0 2 42 2 0 2 33 0 1 2 14 0 0 -1 0 N 3 0.039 2 +Contig20_chr1_110679280_110679687 181 C T 87.4 chr1 110679454 - 1 0 2 30 7 0 2 48 4 0 2 39 2 0 2 33 2 0 2 33 0 0 -1 0 N 31 0.660 2 +Contig222_chr2_9817738_9818143 220 C T 888.0 chr2 9817960 C 17 0 2 78 12 0 2 63 20 0 2 87 8 0 2 51 11 0 2 60 12 0 2 63 Y 76 0.093 1 +Contig47_chr2_25470778_25471576 126 G A 888.0 chr2 25470896 G 12 0 2 63 14 0 2 69 14 0 2 69 10 0 2 57 18 0 2 81 13 0 2 66 N 11 0.289 1 +Contig6_chr2_56859179_56859956 671 T C 999.9 chr2 56859851 T 15 0 2 72 18 0 2 81 20 0 2 90 19 0 2 84 19 0 2 84 24 0 2 99 N 28 5.308 1 +Contig163_chr2_76402959_76404830 221 C T 127.0 chr2 76403181 C 4 0 2 42 10 0 2 57 9 0 2 54 11 0 2 60 7 0 2 48 9 0 2 54 Y 54 0.178 1 +Contig56_chr3_17326225_17327548 387 G C 91.2 chr3 17326591 G 14 0 2 69 13 0 2 66 15 0 2 72 15 0 2 72 13 0 2 66 12 0 2 63 Y 20 0.225 3 +Contig108_chr3_46210055_46210874 367 A G 21.0 chr3 46210423 A 19 0 2 84 10 0 2 57 16 0 2 75 14 0 2 69 20 0 2 87 11 0 2 60 N 236 0.028 1 +Contig1_chr3_51588422_51589409 926 A G 51.0 chr3 51589353 G 2 0 2 33 2 0 2 33 6 0 2 45 4 0 2 39 9 0 2 54 11 0 2 60 N 21 1.147 0 +Contig65_chr3_80727952_80728283 39 T C 71.2 chr3 80727990 T 7 0 2 48 3 0 2 36 8 0 2 51 6 0 2 45 8 0 2 51 11 0 2 60 N 22 7.078 0 +Contig134_chr4_12145648_12148225 1326 C T 164.0 chr4 12146961 C 9 0 2 54 8 0 2 51 7 0 2 48 3 0 2 36 5 0 2 42 5 0 2 42 Y 4 0.080 1 +Contig19_chr4_26233601_26233991 146 G C 51.6 chr4 26233744 G 10 0 2 57 8 0 2 51 9 0 2 54 5 0 2 42 9 0 2 54 4 0 2 39 N 41 0.163 3 +Contig17_chr4_61310346_61311158 267 C T 49.9 chr4 61310604 T 10 0 2 57 7 0 2 48 9 0 2 54 10 0 2 57 14 0 2 69 7 0 2 48 Y 219 0.098 0 +Contig31_chr5_4734956_4736547 1166 C T 133.0 chr5 4736132 C 14 0 2 69 8 0 2 51 17 0 2 78 4 0 2 39 9 0 2 54 12 0 2 63 Y 1 0.021 0 +Contig6_chr5_26899813_26900498 97 A C 88.6 chr5 26899910 A 15 0 2 72 14 0 2 69 27 0 2 108 15 0 2 72 13 0 2 69 12 0 2 63 Y 92 7.370 3 +Contig45_chr5_50892738_50892968 169 C A 25.8 chr5 50892911 C 10 0 2 57 7 0 2 48 10 0 2 60 6 0 2 45 6 0 2 45 13 0 2 66 N 244 0.497 1 +Contig45_chr5_76133561_76134403 388 A G 103.0 chr5 76133941 G 3 0 2 36 8 0 2 51 8 0 2 51 5 0 2 42 6 0 2 45 7 0 2 48 Y 57 0.038 0 +Contig111_chr6_5821219_5822519 1060 A G 68.1 chr6 5822321 T 7 0 2 48 6 0 2 45 11 0 2 60 9 0 2 54 3 0 2 36 12 0 2 63 Y 7 0.231 1 +Contig102_chr6_30271329_30271577 39 T G 139.0 chr6 30271371 G 3 0 2 36 4 0 2 39 6 0 2 45 1 0 2 30 4 0 2 39 4 0 2 39 N 15 1.159 0 +Contig112_chr6_51024554_51024851 100 A G 121.0 chr6 51024654 A 10 0 2 57 12 0 2 63 9 0 2 54 13 0 2 66 14 0 2 69 17 0 2 78 N 75 4.287 0 +Contig84_chr7_6648683_6650255 1297 G A 110.0 chr7 6649988 G 18 0 2 81 9 0 2 54 22 0 2 77 16 0 2 75 20 0 2 87 6 0 2 45 Y 83 0.166 0 +Contig206_chr7_26281823_26282074 103 C A 101.0 chr7 26281925 T 11 0 2 60 16 0 2 61 19 0 2 84 6 0 2 45 19 0 2 84 16 0 2 75 N -1 0.947 1 +Contig38_chr7_50681997_50682600 42 T C 92.4 chr7 50682037 G 6 0 2 45 2 0 2 33 10 0 2 57 12 0 2 63 5 0 2 42 6 0 2 45 Y 94 0.146 0 +Contig91_chr8_12804505_12805470 409 C A 111.0 chr8 12804906 C 8 0 2 51 10 0 2 57 15 0 2 72 12 0 2 63 14 0 2 69 15 0 2 72 N 145 0.175 0 +Contig8_chr8_27811135_27812620 333 C T 37.9 chr8 27811458 C 4 0 2 39 11 0 2 60 18 0 2 81 5 0 2 42 6 0 2 45 5 0 2 42 Y 1 0.272 0 +Contig17_chr8_57490059_57490498 69 G T 97.4 chr8 57490127 A 2 0 2 33 11 0 2 60 15 0 2 72 16 0 2 75 8 0 2 51 10 0 2 57 N 40 0.522 5 +Contig73_chr9_29451535_29452248 616 A G 24.7 chr9 29452127 G 4 0 2 39 7 0 2 48 1 0 2 30 4 0 2 39 7 0 2 48 6 0 2 45 N 49 0.448 4 +Contig96_chr9_39008495_39009278 215 A C 98.7 chr9 39008708 C 7 0 2 48 13 0 2 66 28 0 2 111 16 0 2 75 17 0 2 78 17 0 2 78 Y 8 0.427 1 +Contig22_chr10_15505382_15505589 172 T C 38.5 chr10 15505548 T 2 0 2 33 6 0 2 45 8 0 2 51 8 0 2 51 9 0 2 54 12 0 2 63 N 284 2.861 0 +Contig69_chr10_40547265_40548153 371 G A 58.1 chr10 40547649 A 9 0 2 54 8 0 2 51 8 0 2 51 9 0 2 54 4 0 2 39 5 0 2 42 Y 20 0.138 4 +Contig9_chr10_51475063_51476054 770 C T 57.3 chr10 51475839 C 6 0 2 45 16 0 2 75 16 0 2 75 13 0 2 66 9 0 2 54 9 2 2 21 N 80 0.394 0 +Contig72_chr11_7142765_7143772 146 G A 152.0 chr11 7142911 A 8 0 2 51 8 0 2 51 24 0 2 99 10 0 2 57 17 0 2 78 11 0 2 60 Y 90 1.137 0 +Contig7_chr11_40017076_40017630 352 C T 46.3 chr11 40017422 C 7 0 2 48 9 0 2 54 6 0 2 45 8 0 2 51 16 0 2 75 9 0 2 54 Y 44 0.336 0 +Contig16_chr11_53408448_53408790 187 A G 153.0 chr11 53408638 A 7 0 2 48 9 0 2 54 18 0 2 81 10 0 2 57 11 0 2 60 12 0 2 63 Y 116 1.367 0 +Contig21_chr12_18403415_18404381 586 G T 34.5 chr12 18403983 - 13 0 2 66 16 0 2 75 25 0 2 102 12 0 2 63 12 0 2 63 14 0 2 69 Y 12 0.068 0 +Contig41_chr12_25565452_25566993 475 G T 6.29 chr12 25565926 G 15 0 2 72 14 0 2 69 10 0 2 57 15 0 2 72 18 0 2 81 19 0 2 84 N 10 2.231 1 +Contig5_chr12_53880670_53882675 1221 A C 99.4 chr12 53881888 A 16 0 2 75 18 0 2 81 23 0 2 96 10 0 2 57 15 0 2 72 17 0 2 78 Y 31 0.061 0 +Contig107_chr13_26045881_26046290 341 C G 81.4 chr13 26046230 C 16 0 2 75 20 0 2 90 14 0 2 69 15 0 2 72 9 0 2 54 9 0 2 54 Y 51 4.510 0 +Contig251_chr13_28498333_28501066 864 T G 296.0 chr13 28499180 T 3 0 2 36 5 0 2 42 4 0 2 39 2 0 2 33 5 0 2 42 6 0 2 45 Y 9 0.068 0 +Contig55_chr13_53467708_53468101 221 T G 132.0 chr13 53467925 T 25 0 2 102 12 0 2 63 26 0 2 105 7 0 2 48 16 0 2 75 16 0 2 75 N 20 5.717 1 +Contig48_chr14_11839435_11843272 3014 A G 163.0 chr14 11842446 A 10 0 2 57 8 0 2 51 13 0 2 66 10 0 2 57 5 0 2 42 10 0 2 57 Y 31 0.908 0 +Contig28_chr14_26905747_26909514 975 G C 3.13 chr14 26906723 G 16 0 2 75 10 0 2 57 12 0 2 63 15 0 2 72 10 0 2 57 7 0 2 48 N 287 0.117 2 +Contig64_chr14_56768376_56768902 473 C T 29.0 chr14 56768832 C 15 0 2 72 11 0 2 60 14 0 2 69 14 0 2 69 7 0 2 48 9 0 2 54 Y 91 8.281 0 +Contig60_chr15_18493036_18494316 150 G A 92.6 chr15 18493188 G 9 0 2 54 13 0 2 66 9 0 2 54 6 0 2 45 5 0 2 42 12 0 2 63 Y 45 0.125 0 +Contig112_chr15_26772864_26773267 374 C T 21.6 chr15 26773244 C 4 0 2 39 4 0 2 39 5 0 2 42 2 0 2 33 4 0 2 39 3 0 2 36 N 18 +99. 0 +Contig119_chr16_6160274_6160477 180 G A 54.8 chr16 6160457 G 7 0 2 48 6 0 2 45 12 0 2 63 3 0 2 36 11 0 2 60 10 0 2 57 N 42 +99. 0 +Contig60_chr16_28079136_28080263 588 T G 157.0 chr16 28079739 T 22 0 2 93 20 0 2 87 22 0 2 93 17 0 2 78 12 0 2 63 10 0 2 57 Y 105 5.999 1 +Contig31_chr17_12128267_12129637 205 G A 90.5 chr17 12128484 G 7 0 2 48 6 0 2 45 6 0 2 45 11 0 2 60 7 0 2 48 4 0 2 39 Y 10 0.246 0 +Contig99_chr17_26021506_26022200 505 C T 88.8 chr17 26022017 T 15 0 2 72 13 0 2 66 19 0 2 84 9 0 2 54 10 0 2 57 11 0 2 60 Y 1 0.172 1 +Contig27_chr17_61713766_61716585 1056 G C 40.0 chr17 61714821 G 4 0 2 39 8 0 2 51 10 0 2 57 6 0 2 45 6 0 2 45 3 0 2 36 N 6 2.200 4 +Contig229_chr18_3706523_3708577 1076 A G 83.9 chr18 3707630 A 11 0 2 60 13 0 2 66 26 0 2 105 11 0 2 60 15 0 2 72 17 0 2 78 Y 63 0.445 0 +Contig82_chr18_27305489_27306229 566 C T 49.5 chr18 27306051 A 6 0 2 45 6 0 2 45 10 0 2 57 11 0 2 60 6 0 2 45 7 0 2 48 N 1 0.349 0 +Contig64_chr18_55979770_55980315 49 G A 89.1 chr18 55979824 G 3 0 2 36 9 0 2 54 7 0 2 51 4 0 2 39 3 0 2 36 3 0 2 36 Y -1 2.124 0 +Contig146_chr19_5221790_5223013 143 A G 114.0 chr19 5221916 - 1 0 2 30 4 0 2 39 3 0 2 36 5 0 2 42 2 0 2 33 5 0 2 42 Y 12 0.870 0 +Contig129_chr19_25541958_25542221 202 T C 68.1 chr19 25542154 C 11 0 2 60 19 0 2 84 10 0 2 60 17 0 2 78 9 0 2 54 12 0 2 63 N -1 2.551 1 +Contig60_chr19_54013816_54014398 281 A G 138.0 chr19 54014103 C 6 0 2 45 15 0 2 72 7 0 2 48 10 0 2 57 15 0 2 72 10 0 2 57 Y 188 1.271 0 +Contig50_chr20_12138509_12141975 3206 C A 248.0 chr20 12141763 C 8 0 2 51 15 0 2 72 14 0 2 69 6 0 2 45 10 0 2 57 7 0 2 48 Y 2 0.384 0 +Contig36_chr20_32631363_32632049 176 G A 24.1 chr20 32631526 G 7 0 2 48 14 0 2 69 19 0 2 84 14 0 2 69 15 0 2 72 16 0 2 75 N 50 1.150 0 +Contig50_chr21_4178523_4178687 121 G A 362.0 chr21 4178640 G 8 0 2 51 14 0 2 69 5 0 2 42 3 0 2 36 11 0 2 60 4 0 2 39 N 392 0.483 0 +Contig129_chr21_31045749_31046924 381 A G 129.0 chr21 31046141 A 19 0 2 84 8 0 2 51 23 0 2 96 12 0 2 63 15 0 2 72 18 0 2 81 Y 69 0.028 2 +Contig159_chr22_7896450_7896974 109 G C 151.0 chr22 7896570 G 16 0 2 75 5 7 1 62 14 0 2 69 16 0 2 75 13 0 2 66 13 0 2 66 Y 16 0.465 0 +Contig23_chr22_34612023_34612568 167 C G 92.3 chr22 34612181 C 11 0 2 60 18 0 2 81 13 0 2 66 8 0 2 51 12 0 2 63 14 0 2 69 Y 7 0.409 0 +Contig26_chr22_57817664_57819633 1453 A G 150.0 chr22 57819121 G 9 0 2 54 9 0 2 54 13 0 2 66 15 0 2 72 11 0 2 60 14 0 2 69 N 15 0.471 1 +Contig133_chr23_3525134_3526502 1223 A G 201.0 chr23 3526387 A 11 0 2 60 13 0 2 66 23 0 2 96 21 0 2 90 13 0 2 66 10 0 2 57 Y 61 1.359 0 +Contig35_chr23_28447813_28449115 70 T A 21.3 chr23 28447881 T 9 0 2 54 8 0 2 51 10 0 2 57 9 0 2 54 10 0 2 57 12 0 2 63 N 251 0.163 1 +Contig50_chr24_22515247_22516072 761 C T 243.0 chr24 22515981 T 11 0 2 60 10 0 2 57 8 0 2 51 9 0 2 54 18 0 2 81 8 0 2 51 Y 1 0.190 0 +Contig84_chr24_29196623_29199644 466 C T 126.0 chr24 29197091 T 7 0 2 48 11 0 2 60 8 0 2 51 7 0 2 48 11 0 2 60 15 0 2 72 Y 42 0.215 0 +Contig144_chr25_4011170_4013134 541 A G 160.0 chr25 4011690 A 12 0 2 63 17 0 2 78 13 0 2 66 13 0 2 66 13 0 2 66 13 0 2 66 Y 5 0.087 0 +Contig103_chr25_38891221_38892140 407 G A 131.0 chr25 38891644 G 8 0 2 51 14 0 2 69 18 0 2 81 8 0 2 51 8 0 2 51 11 0 2 60 Y 149 0.167 4 +Contig204_chr26_4311195_4311778 170 C T 16.9 chr26 4311363 T 20 0 2 87 8 0 2 51 13 0 2 66 18 0 2 81 11 0 2 60 14 0 2 69 N 35 0.085 0 +Contig146_chr26_26622638_26623906 574 G A 186.0 chr26 26623219 A 11 0 2 60 12 0 2 63 9 0 2 54 11 0 2 60 9 0 2 54 12 0 2 63 Y 1 0.318 0 +Contig135_chr27_6853874_6854079 158 C T 116.0 chr27 6854032 T 18 0 2 81 19 0 2 84 13 0 2 66 7 0 2 48 8 0 2 51 11 0 2 60 N 4 0.060 1 +Contig64_chr27_34654435_34654621 132 C A 115.0 chr27 34654567 T 2 0 2 33 2 0 2 33 5 0 2 42 3 0 2 36 3 0 2 36 8 0 2 51 N 12 0.297 1 +Contig131_chr28_6481806_6483783 138 C T 36.2 chr28 6481953 C 12 0 2 63 12 0 2 63 20 0 2 87 11 0 2 60 10 0 2 57 12 0 2 63 Y 10 0.387 0 +Contig60_chr28_30197166_30197364 92 T C 164.0 chr28 30197258 T 10 0 2 57 13 0 2 66 15 0 2 72 16 0 2 75 12 0 2 63 11 0 2 60 N 369 1.139 0 +Contig29_chr29_4726399_4727143 559 A T 163.0 chr29 4726955 A 15 0 2 72 18 0 2 81 18 0 2 81 16 0 2 75 11 0 2 60 14 0 2 72 Y 161 3.114 0 +Contig1_chr30_5992217_5993068 106 C T 129.0 chr30 5992319 C 10 0 2 57 11 0 2 60 7 0 2 48 11 0 2 60 10 0 2 57 12 0 2 63 Y 76 1.079 0 +Contig165_chr30_25804389_25804926 190 T C 126.0 chr30 25804592 C 3 0 2 36 8 0 2 51 7 0 2 48 10 0 2 57 7 0 2 48 4 0 2 39 Y 113 0.329 0 +Contig38_chr31_5164423_5166573 2074 C T 134.0 chr31 5166501 T 13 0 2 66 10 0 2 57 17 0 2 78 11 0 2 60 17 0 2 78 10 0 2 57 Y 58 +99. 0 +Contig17_chr31_26433828_26434459 498 T C 9.79 chr31 26434322 T 18 0 2 81 10 0 2 57 15 0 2 72 13 0 2 66 16 0 2 75 15 0 2 72 Y 137 4.814 0 +Contig9_chr32_19479532_19479735 12 A G 20.7 chr32 19479544 A 1 0 2 30 2 0 2 33 1 0 2 30 5 0 2 42 3 0 2 36 3 0 2 36 N 17 +99. 0 +Contig30_chr32_25902721_25905783 208 C G 162.0 chr32 25902927 G 11 0 2 60 13 0 2 66 11 0 2 60 12 0 2 63 7 0 2 48 11 0 2 60 Y 145 0.322 2 +Contig18_chr33_22207246_22209159 1363 G T 51.5 chr33 22208619 - 16 0 2 75 8 0 2 51 11 0 2 60 10 0 2 57 15 0 2 72 12 0 2 63 Y 59 2.560 0 +Contig170_chr33_26189421_26189940 292 T C 98.4 chr33 26189703 T 21 0 2 90 13 0 2 66 15 0 2 72 13 0 2 66 19 0 2 84 13 0 2 66 Y 23 0.307 0 +Contig113_chr34_13341080_13341643 236 C T 90.7 chr34 13341316 C 4 0 2 39 2 0 2 33 8 0 2 51 4 0 2 39 8 0 2 51 3 0 2 36 Y 47 0.412 3 +Contig152_chr34_31794848_31795540 242 G A 93.2 chr34 31795093 G 11 0 2 60 24 0 2 99 17 0 2 78 15 0 2 72 18 0 2 81 17 0 2 78 Y 123 2.780 0 +Contig47_chr35_3666773_3667898 348 G T 124.0 chr35 3667121 G 9 0 2 54 20 0 2 87 18 0 2 81 15 0 2 72 12 0 2 63 14 0 2 69 Y 285 0.235 0 +Contig74_chr35_25394343_25394813 303 A T 221.0 chr35 25394646 G 23 0 2 96 15 0 2 72 25 0 2 105 7 7 1 49 18 0 2 81 16 0 2 75 Y 58 4.298 0 +Contig5_chr36_4562983_4563634 343 C T 151.0 chr36 4563324 T 20 0 2 87 20 0 2 87 23 0 2 96 24 0 2 99 9 0 2 54 8 0 2 51 Y 40 1.169 0 +Contig133_chr36_32954045_32955409 136 A G 116.0 chr36 32954182 A 16 0 2 75 15 0 2 72 20 0 2 87 11 0 2 60 18 0 2 81 13 0 2 66 Y 74 3.772 1 +Contig53_chr37_6665763_6665919 116 C T 111.0 chr37 6665875 C 9 0 2 54 9 0 2 54 5 0 2 42 9 0 2 54 8 0 2 51 10 0 2 57 N 15 10.875 1 +Contig2_chr37_31197993_31198256 182 C T 39.6 chr37 31198171 T 6 0 2 45 10 0 2 57 7 0 2 48 9 0 2 54 10 0 2 57 12 0 2 63 N 2 0.595 0 +Contig7_chr38_12217200_12218387 1163 A T 44.4 chr38 12218353 A 11 0 2 60 13 0 2 66 17 0 2 78 10 0 2 57 11 0 2 60 11 0 2 60 Y 67 +99. 0 +Contig265_chrX_2689247_2689484 114 C G 103.0 chrX 2689356 C 11 0 2 60 9 0 2 54 13 0 2 66 16 0 2 75 14 0 2 69 10 0 2 57 N 2 9.232 1 +Contig113_chrX_26287829_26288398 385 C T 59.6 chrX 26288213 C 9 0 2 54 9 0 2 54 17 0 2 78 11 0 2 60 3 8 1 44 4 0 2 39 N 13 0.077 0 +Contig90_chrX_57430715_57431566 548 C T 116.0 chrX 57431266 T 9 0 2 54 18 0 2 81 13 0 2 66 14 0 2 69 8 0 2 54 7 0 2 48 Y 261 0.154 1 +Contig133_chrX_84833782_84834125 182 G A 69.7 chrX 84833962 G 5 0 2 42 18 0 2 81 12 0 2 63 19 0 2 84 6 3 1 27 7 0 2 48 N 619 0.278 0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/specify_restriction_enzymes/specify_restriction_enzymes.wsf Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,10 @@ +#{"column_names":["scaf","pos","A","B","qual","ref","rpos","rnuc","1A","1B","1G","1Q","2A","2B","2G","2Q","3A","3B","3G","3Q","4A","4B","4G","4Q","5A","5B","5G","5Q","6A","6B","6G","6Q","pair","dist", +#"prim","rflp"],"dbkey":"canFam2","individuals":[["PB1",9],["PB2",13],["PB3",17],["PB4",21],["PB6",25],["PB8",29]],"pos":2,"rPos":7,"ref":6,"scaffold":1,"species":"bear"} +Contig47_chr2_25470778_25471576 126 G A 888.0 chr2 25470896 G 12 0 2 63 14 0 2 69 14 0 2 69 10 0 2 57 18 0 2 81 13 0 2 66 N 11 0.289 1 +Contig73_chr9_29451535_29452248 616 A G 24.7 chr9 29452127 G 4 0 2 39 7 0 2 48 1 0 2 30 4 0 2 39 7 0 2 48 6 0 2 45 N 49 0.448 4 +Contig69_chr10_40547265_40548153 371 G A 58.1 chr10 40547649 A 9 0 2 54 8 0 2 51 8 0 2 51 9 0 2 54 4 0 2 39 5 0 2 42 Y 20 0.138 4 +Contig99_chr17_26021506_26022200 505 C T 88.8 chr17 26022017 T 15 0 2 72 13 0 2 66 19 0 2 84 9 0 2 54 10 0 2 57 11 0 2 60 Y 1 0.172 1 +Contig27_chr17_61713766_61716585 1056 G C 40.0 chr17 61714821 G 4 0 2 39 8 0 2 51 10 0 2 57 6 0 2 45 6 0 2 45 3 0 2 36 N 6 2.200 4 +Contig26_chr22_57817664_57819633 1453 A G 150.0 chr22 57819121 G 9 0 2 54 9 0 2 54 13 0 2 66 15 0 2 72 11 0 2 60 14 0 2 69 N 15 0.471 1 +Contig103_chr25_38891221_38892140 407 G A 131.0 chr25 38891644 G 8 0 2 51 14 0 2 69 18 0 2 81 8 0 2 51 8 0 2 51 11 0 2 60 Y 149 0.167 4 +Contig64_chr27_34654435_34654621 132 C A 115.0 chr27 34654567 T 2 0 2 33 2 0 2 33 5 0 2 42 3 0 2 36 3 0 2 36 8 0 2 51 N 12 0.297 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/gd.oscar.loc.sample Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,4 @@ +#<species> <data_file> +#hg19 /galaxy/local_data/genome_diversity/oscar/hsa_ENSEMBLcKEGGctpthw.tsv +#bosTau4 /galaxy/local_data/genome_diversity/oscar/bta_ENSEMBLcKEGGctpthw.tsv +#canFam2 /galaxy/local_data/genome_diversity/oscar/cfa_ENSEMBLcKEGGctpthw.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/gd.primers.loc.sample Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,6 @@ +#<species> <primers_file_path> +#aye-aye /galaxy/local_data/genome_diversity/primers/aye-aye_Galaxy_primers.txt +#bear /galaxy/local_data/genome_diversity/primers/bear_Galaxy_primers.txt +#bighorn /galaxy/local_data/genome_diversity/primers/bighorn_Galaxy_primers.txt +#tasmanian_devil /galaxy/local_data/genome_diversity/primers/devil_Galaxy_primers.txt +#tick /galaxy/local_data/genome_diversity/primers/tick_Galaxy_primers.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/gd.rank.loc.sample Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,4 @@ +#<species> <prefix> <kxml_dir_path> <path_to_dict_file> +#hg19 hsa /galaxy/local_data/genome_diversity/rank/KXML_hsa.d /galaxy/local_data/genome_diversity/rank/hsa_dict.txt +#canFam2 cfa /galaxy/local_data/genome_diversity/rank/KXML_cfa.d /galaxy/local_data/genome_diversity/rank/cfa_dict.txt +#bosTau4 bta /galaxy/local_data/genome_diversity/rank/KXML_bta.d /galaxy/local_data/genome_diversity/rank/bta_dict.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/gd.snps.loc.sample Thu Apr 05 15:28:27 2012 -0400 @@ -0,0 +1,3 @@ +#<species> <SNP_call_file_path> +#bighorn /galaxy/local_data/genome_diversity/snps/bighorn_snps.txt +#tasmanian_devil /galaxy/local_data/genome_diversity/snps/devil_snps.txt