annotate uniqprimer-0.5.0/primertools/nucmerparser.py @ 2:05ae1ce478bc draft default tip

Uploaded
author dereeper
date Fri, 08 Jun 2018 10:49:07 -0400
parents cdd8f911ad91
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
1 '''
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
2 Created on Jan 1, 2011
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
3
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
4 @author: John L. Herndon
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
5 @contact: herndon@cs.colostate.edu
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
6 @organization: Colorado State University
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
7 @group: Computer Science Department, Asa Ben-Hur's laboratory
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
8 '''
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
9
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
10 import utils
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
11 import os
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
12 import re
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
13
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
14 def parseCoordMatchLine( match ):
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
15
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
16 match = match.replace( '\t', ' ' )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
17 sections = match.split( '|', 4 )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
18
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
19 #parse the first section, containing the start and end
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
20 #locations of the match
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
21 firstsection = sections[ 0 ].strip( )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
22 firstsectiontokens = re.split( ' +', firstsection )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
23 start = int( firstsectiontokens[ 0 ].strip( ) )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
24 end = int( firstsectiontokens[ 1 ].strip( ) )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
25
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
26 #parse the last section, containing the sequenceID
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
27 lastsection = sections[ -1 ].strip( )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
28 lastsectiontokens = re.split( " +", lastsection )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
29
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
30 seqid = lastsectiontokens[ 0 ].strip( )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
31
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
32 return utils.Match( start, end, seqid )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
33
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
34 def parseCoordMatchFile( coordFileName ):
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
35 '''
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
36 A method to parse the coord file.
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
37 returns a list of utils.match objects
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
38 '''
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
39 returnValue = [ ]
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
40
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
41 #throw if the file doesn't exist
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
42 if os.path.exists( coordFileName ) == False:
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
43 raise utils.NoFileFoundException( coordFileName )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
44
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
45
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
46 #read the nucmer file into memory
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
47 lines = open( coordFileName ).readlines( )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
48
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
49 #skip forward to the start of the matches.
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
50 i = 0
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
51 while lines[ i ] [ 0] != '=':
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
52 i += 1
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
53 matchLines = lines[ i+1 : ]
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
54
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
55 #parse each line for match start, end and sequenceID
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
56 for matchLine in matchLines:
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
57 returnValue.append( parseCoordMatchLine( matchLine ) )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
58
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
59 utils.logMessage( "NucmerParser::parseCoordMatchFile( )", "Parse {0}, finding {1} matches".format( coordFileName, len( returnValue ) ) )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
60
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
61 return returnValue
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
62
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
63
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
64
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
65
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
66
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
67
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
68