annotate uniqprimer-0.5.0/primertools/includefilemanager.py @ 2:05ae1ce478bc draft default tip

Uploaded
author dereeper
date Fri, 08 Jun 2018 10:49:07 -0400
parents cdd8f911ad91
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
1 '''
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
2 Created on Jan 1, 2011
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
3
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
4 @author: John L. Herndon
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
5 @contact: herndon@cs.colostate.edu
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
6 @organization: Colorado State University
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
7 @group: Computer Science Department, Asa Ben-Hur's laboratory
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
8 '''
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
9
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
10
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
11 import fastaparser
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
12 import utils
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
13 import os
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
14 import programs
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
15 import nucmerparser
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
16 import copy
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
17
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
18 class IncludeFileManager( object ):
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
19 """
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
20 A class to manage include files
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
21 """
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
22 #This class needs some work. Need to come up with a way to find unique sequences between all include files....
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
23
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
24 def __init__( self ):
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
25 """
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
26 Constructor
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
27 """
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
28 self.includeFiles = [ ]
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
29 self.nucmer = programs.Nucmer( )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
30 self.isExcludeFileInitialized = False
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
31 self.isReferenceFileInitialized = False
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
32 self.referenceFile = None
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
33 self.referenceSequence = None
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
34 self.uniqueSequences = None
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
35
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
36 def setExcludeFile( self, excludeFileName ):
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
37 """
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
38 A function to set the exclude file that will be used when nucmer is called
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
39 """
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
40
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
41 utils.logMessage( "IncludeFileManager::setExcludeFile( )", "fileName {0}".format( excludeFileName ) )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
42 self.excludeFileName = excludeFileName
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
43 self.isExcludeFileInitialized = True
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
44
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
45
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
46 def findUniqueSequencesInFile(self, doWantFile, doNotWantFile ):
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
47 utils.logMessage( "IncludeFileManager::findUniqueSequence( )", "running nucmer for reference file: {0}".format( doWantFile ) )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
48 coordFile = self.nucmer.execute( [ doWantFile, doNotWantFile ] )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
49
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
50 matches = nucmerparser.parseCoordMatchFile( coordFile )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
51 sequences = fastaparser.parseFastaFileAsPrimerSequence( doWantFile )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
52
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
53 for match in matches:
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
54 if sequences.has_key( match.seqID ):
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
55 primerData = sequences[ match.seqID ]
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
56 primerData.addMatch( match )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
57 else:
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
58 print "Warning: id from .coords file not found in sequence data..."
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
59 utils.logMessage( "IncludeFileManager::processMatches( )", "WARNING - an ID was read in a Match that does not correspond to a sequence read from the fasta file!" )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
60
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
61 returnValue = [ ]
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
62
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
63 for key in sequences.keys( ):
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
64 sequence = sequences[ key ]
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
65 subSequences = sequence.getNonMatchedSubSequences( )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
66 returnValue.extend( subSequences )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
67
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
68 return returnValue
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
69
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
70
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
71 def findCommonSequencesInFile(self, want, alsoWant ):
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
72 utils.logMessage( "IncludeFileManager::findUniqueSequence( )", "running nucmer for reference file: {0}".format( want ) )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
73
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
74 print want, alsoWant
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
75 coordFile = self.nucmer.execute( [ want, alsoWant ] )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
76
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
77 matches = nucmerparser.parseCoordMatchFile( coordFile )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
78 sequences = fastaparser.parseFastaFileAsPrimerSequence( want )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
79
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
80 for match in matches:
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
81 if sequences.has_key( match.seqID ):
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
82 primerData = sequences[ match.seqID ]
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
83 primerData.addMatch( match )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
84
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
85 returnValue = [ ]
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
86 for key in sequences:
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
87 sequence = sequences[ key ]
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
88 subSequences = sequence.getMatchedSubSequences( )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
89 returnValue.extend( subSequences )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
90
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
91
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
92 return returnValue
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
93
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
94
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
95 def processIncludeFile( self, includeFileName ):
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
96 """
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
97 A function that adds and processes and include file.
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
98 An exclude file must be set for this function to be called.
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
99 """
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
100
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
101 utils.logMessage( "IncludeFileManager::processIncludeFile( )", "processing {0}".format( includeFileName ) )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
102
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
103 if self.isExcludeFileInitialized == False:
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
104 utils.logMessage( "IncludeFileManager::processIncludeFile( )", "no exclude file set".format( includeFileName ) )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
105 raise utils.ModuleNotInitializedException( "includefilemanager", "no exclude file set" )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
106
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
107 if self.isReferenceFileInitialized == False:
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
108
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
109 utils.logMessage( "IncludeFileManager::processIncludeFile( )", "running nucmer for reference file: {0}".format( includeFileName ) )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
110 self.uniqueSequences = self.findUniqueSequencesInFile( includeFileName, self.excludeFileName )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
111
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
112 self.referenceFile = includeFileName
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
113 self.isReferenceFileInitialized = True
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
114
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
115 else:
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
116 #write the unique sequences to a temp file
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
117 tempSequences = utils.getTemporaryDirectory( ) + "/tempSequences.fasta"
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
118 fastaparser.writeFastaFile( self.uniqueSequences, tempSequences )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
119 self.findCommonSequencesInFile( includeFileName, tempSequences )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
120 self.includeFiles.append( includeFileName )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
121
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
122
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
123 def getUniqueSequences( self ):
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
124 """
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
125 getUniqueSequences - return a dictionary of all sequences that are found in include fasta files, but not the
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
126 combined exclude fasta files. The dictionary is indexed by the file ID
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
127 """
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
128
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
129 return self.uniqueSequences
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
130
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
131
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
132