Mercurial > repos > dereeper > uniqprimer
comparison uniqprimer-0.5.0/primertools/nucmerparser.py @ 0:cdd8f911ad91 draft
Uploaded
| author | dereeper |
|---|---|
| date | Fri, 07 Oct 2016 04:18:11 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:cdd8f911ad91 |
|---|---|
| 1 ''' | |
| 2 Created on Jan 1, 2011 | |
| 3 | |
| 4 @author: John L. Herndon | |
| 5 @contact: herndon@cs.colostate.edu | |
| 6 @organization: Colorado State University | |
| 7 @group: Computer Science Department, Asa Ben-Hur's laboratory | |
| 8 ''' | |
| 9 | |
| 10 import utils | |
| 11 import os | |
| 12 import re | |
| 13 | |
| 14 def parseCoordMatchLine( match ): | |
| 15 | |
| 16 match = match.replace( '\t', ' ' ) | |
| 17 sections = match.split( '|', 4 ) | |
| 18 | |
| 19 #parse the first section, containing the start and end | |
| 20 #locations of the match | |
| 21 firstsection = sections[ 0 ].strip( ) | |
| 22 firstsectiontokens = re.split( ' +', firstsection ) | |
| 23 start = int( firstsectiontokens[ 0 ].strip( ) ) | |
| 24 end = int( firstsectiontokens[ 1 ].strip( ) ) | |
| 25 | |
| 26 #parse the last section, containing the sequenceID | |
| 27 lastsection = sections[ -1 ].strip( ) | |
| 28 lastsectiontokens = re.split( " +", lastsection ) | |
| 29 | |
| 30 seqid = lastsectiontokens[ 0 ].strip( ) | |
| 31 | |
| 32 return utils.Match( start, end, seqid ) | |
| 33 | |
| 34 def parseCoordMatchFile( coordFileName ): | |
| 35 ''' | |
| 36 A method to parse the coord file. | |
| 37 returns a list of utils.match objects | |
| 38 ''' | |
| 39 returnValue = [ ] | |
| 40 | |
| 41 #throw if the file doesn't exist | |
| 42 if os.path.exists( coordFileName ) == False: | |
| 43 raise utils.NoFileFoundException( coordFileName ) | |
| 44 | |
| 45 | |
| 46 #read the nucmer file into memory | |
| 47 lines = open( coordFileName ).readlines( ) | |
| 48 | |
| 49 #skip forward to the start of the matches. | |
| 50 i = 0 | |
| 51 while lines[ i ] [ 0] != '=': | |
| 52 i += 1 | |
| 53 matchLines = lines[ i+1 : ] | |
| 54 | |
| 55 #parse each line for match start, end and sequenceID | |
| 56 for matchLine in matchLines: | |
| 57 returnValue.append( parseCoordMatchLine( matchLine ) ) | |
| 58 | |
| 59 utils.logMessage( "NucmerParser::parseCoordMatchFile( )", "Parse {0}, finding {1} matches".format( coordFileName, len( returnValue ) ) ) | |
| 60 | |
| 61 return returnValue | |
| 62 | |
| 63 | |
| 64 | |
| 65 | |
| 66 | |
| 67 | |
| 68 |
