annotate MicroPITA.py @ 19:656e80be827a draft

Updated the version in the main micropita.xml
author george-weingart
date Thu, 11 Aug 2016 01:00:39 -0400
parents 7d25ecd225dd
children 1d09ffab87a7
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1 #!/usr/bin/env python
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
2 """
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
3 Author: Timothy Tickle
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
4 Description: Class to Run analysis for the microPITA paper
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
5 """
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
6
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
7 #####################################################################################
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
8 #Copyright (C) <2012>
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
9 #
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
10 #Permission is hereby granted, free of charge, to any person obtaining a copy of
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
11 #this software and associated documentation files (the "Software"), to deal in the
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
12 #Software without restriction, including without limitation the rights to use, copy,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
13 #modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
14 #and to permit persons to whom the Software is furnished to do so, subject to
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
15 #the following conditions:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
16 #
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
17 #The above copyright notice and this permission notice shall be included in all copies
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
18 #or substantial portions of the Software.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
19 #
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
20 #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
21 #INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
22 #PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
23 #HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
24 #OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
25 #SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
26 #####################################################################################
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
27
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
28 __author__ = "Timothy Tickle"
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
29 __copyright__ = "Copyright 2012"
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
30 __credits__ = ["Timothy Tickle"]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
31 __license__ = "MIT"
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
32 __maintainer__ = "Timothy Tickle"
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
33 __email__ = "ttickle@sph.harvard.edu"
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
34 __status__ = "Development"
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
35
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
36 import sys
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
37 import argparse
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
38 from src.breadcrumbs.src.AbundanceTable import AbundanceTable
16
7d25ecd225dd Updated Micropita.py to suppres future warnings as this was causing a problem inn Galaxy
george.weingart@gmail.com
parents: 0
diff changeset
39 import warnings
7d25ecd225dd Updated Micropita.py to suppres future warnings as this was causing a problem inn Galaxy
george.weingart@gmail.com
parents: 0
diff changeset
40 warnings.simplefilter(action = "ignore", category = FutureWarning)
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
41 from src.breadcrumbs.src.ConstantsBreadCrumbs import ConstantsBreadCrumbs
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
42 from src.breadcrumbs.src.Metric import Metric
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
43 from src.breadcrumbs.src.KMedoids import Kmedoids
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
44 from src.breadcrumbs.src.MLPYDistanceAdaptor import MLPYDistanceAdaptor
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
45 from src.breadcrumbs.src.SVM import SVM
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
46 from src.breadcrumbs.src.UtilityMath import UtilityMath
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
47
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
48 from src.ConstantsMicropita import ConstantsMicropita
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
49 import csv
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
50 import logging
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
51 import math
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
52 import mlpy
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
53 import numpy as np
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
54 import operator
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
55 import os
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
56 import random
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
57 import scipy.cluster.hierarchy as hcluster
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
58 import scipy.spatial.distance
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
59 from types import *
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
60
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
61 class MicroPITA:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
62 """
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
63 Selects samples from a first tier of a multi-tiered study to be used in a second tier.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
64 Different methods can be used for selection.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
65 The expected input is an abundance table (and potentially a text file of targeted features,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
66 if using the targeted features option). Output is a list of samples exhibiting the
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
67 characteristics of interest.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
68 """
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
69
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
70 #Constants
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
71 #Diversity metrics Alpha
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
72 c_strInverseSimpsonDiversity = Metric.c_strInvSimpsonDiversity
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
73 c_strChao1Diversity = Metric.c_strChao1Diversity
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
74
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
75 #Diversity metrics Beta
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
76 c_strBrayCurtisDissimilarity = Metric.c_strBrayCurtisDissimilarity
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
77
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
78 #Additive inverses of diversity metrics beta
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
79 c_strInvBrayCurtisDissimilarity = Metric.c_strInvBrayCurtisDissimilarity
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
80
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
81 #Technique Names
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
82 ConstantsMicropita.c_strDiversity2 = ConstantsMicropita.c_strDiversity+"_C"
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
83
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
84 #Targeted feature settings
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
85 c_strTargetedRanked = ConstantsMicropita.c_strTargetedRanked
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
86 c_strTargetedAbundance = ConstantsMicropita.c_strTargetedAbundance
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
87
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
88 #Technique groupings
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
89 # c_lsDiversityMethods = [ConstantsMicropita.c_strDiversity,ConstantsMicropita.c_strDiversity2]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
90
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
91 #Converts ecology metrics into standardized method selection names
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
92 dictConvertAMetricDiversity = {c_strInverseSimpsonDiversity:ConstantsMicropita.c_strDiversity, c_strChao1Diversity:ConstantsMicropita.c_strDiversity2}
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
93 # dictConvertMicroPITAToAMetric = {ConstantsMicropita.c_strDiversity:c_strInverseSimpsonDiversity, ConstantsMicropita.c_strDiversity2:c_strChao1Diversity}
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
94 dictConvertBMetricToMethod = {c_strBrayCurtisDissimilarity:ConstantsMicropita.c_strRepresentative}
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
95 dictConvertInvBMetricToMethod = {c_strBrayCurtisDissimilarity:ConstantsMicropita.c_strExtreme}
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
96
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
97 #Linkage used in the Hierarchical clustering
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
98 c_strHierarchicalClusterMethod = 'average'
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
99
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
100 ####Group 1## Diversity
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
101 #Testing: Happy path Testing (8)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
102 def funcGetTopRankedSamples(self, lldMatrix = None, lsSampleNames = None, iTopAmount = None):
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
103 """
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
104 Given a list of lists of measurements, for each list the indices of the highest values are returned. If lsSamplesNames is given
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
105 it is treated as a list of string names that is in the order of the measurements in each list. Indices are returned or the sample
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
106 names associated with the indices.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
107
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
108 :param lldMatrix: List of lists [[value,value,value,value],[value,value,value,value]].
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
109 :type: List of lists List of measurements. Each list is a different measurement. Each measurement in positionally related to a sample.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
110 :param lsSampleNames: List of sample names positionally related (the same) to each list (Optional).
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
111 :type: List of strings List of strings.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
112 :param iTopAmount: The amount of top measured samples (assumes the higher measurements are better).
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
113 :type: integer Integer amount of sample names/ indices to return.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
114 :return List: List of samples to be selected.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
115 """
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
116 topRankListRet = []
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
117 for rowMetrics in lldMatrix:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
118 #Create 2 d array to hold value and index and sort
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
119 liIndexX = [rowMetrics,range(len(rowMetrics))]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
120 liIndexX[1].sort(key = liIndexX[0].__getitem__,reverse = True)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
121
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
122 if lsSampleNames:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
123 topRankListRet.append([lsSampleNames[iIndex] for iIndex in liIndexX[1][:iTopAmount]])
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
124 else:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
125 topRankListRet.append(liIndexX[1][:iTopAmount])
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
126
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
127 return topRankListRet
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
128
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
129 ####Group 2## Representative Dissimilarity
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
130 #Testing: Happy path tested 1
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
131 def funcGetCentralSamplesByKMedoids(self, npaMatrix=None, sMetric=None, lsSampleNames=None, iNumberSamplesReturned=0, istmBetaMatrix=None, istrmTree=None, istrmEnvr=None):
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
132 """
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
133 Gets centroid samples by k-medoids clustering of a given matrix.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
134
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
135 :param npaMatrix: Numpy array where row=features and columns=samples
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
136 :type: Numpy array Abundance Data.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
137 :param sMetric: String name of beta metric used as the distance metric.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
138 :type: String String name of beta metric.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
139 :param lsSampleNames: The names of the sample
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
140 :type: List List of strings
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
141 :param iNumberSamplesReturned: Number of samples to return, each will be a centroid of a sample.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
142 :type: Integer Number of samples to return
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
143 :return List: List of selected samples.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
144 :param istmBetaMatrix: File with beta-diversity matrix
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
145 :type: File stream or file path string
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
146 """
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
147
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
148 #Count of how many rows
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
149 sampleCount = npaMatrix.shape[0]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
150 if iNumberSamplesReturned > sampleCount:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
151 logging.error("MicroPITA.funcGetCentralSamplesByKMedoids:: There are not enough samples to return the amount of samples specified. Return sample count = "+str(iNumberSamplesReturned)+". Sample number = "+str(sampleCount)+".")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
152 return False
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
153
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
154 #If the cluster count is equal to the sample count return all samples
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
155 if sampleCount == iNumberSamplesReturned:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
156 return list(lsSampleNames)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
157
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
158 #Get distance matrix
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
159 distanceMatrix=scipy.spatial.distance.squareform(Metric.funcReadMatrixFile(istmMatrixFile=istmBetaMatrix,lsSampleOrder=lsSampleNames)[0]) if istmBetaMatrix else Metric.funcGetBetaMetric(npadAbundancies=npaMatrix, sMetric=sMetric, istrmTree=istrmTree, istrmEnvr=istrmEnvr, lsSampleOrder=lsSampleNames)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
160 if type(distanceMatrix) is BooleanType:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
161 logging.error("MicroPITA.funcGetCentralSamplesByKMedoids:: Could not read in the supplied distance matrix, returning false.")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
162 return False
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
163
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
164 # Handle unifrac output
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
165 if sMetric in [Metric.c_strUnifracUnweighted,Metric.c_strUnifracWeighted]:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
166 distanceMatrix = distanceMatrix[0]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
167
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
168 #Log distance matrix
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
169 logging.debug("MicroPITA.funcGetCentralSamplesByKMedoids:: Distance matrix for representative selection using metric="+str(sMetric))
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
170
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
171 distance = MLPYDistanceAdaptor(npaDistanceMatrix=distanceMatrix, fIsCondensedMatrix=True)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
172
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
173 #Create object to determine clusters/medoids
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
174 medoidsMaker = Kmedoids(k=iNumberSamplesReturned, dist=distance)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
175 #medoidsData includes(1d numpy array, medoids indexes;
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
176 # 1d numpy array, non-medoids indexes;
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
177 # 1d numpy array, cluster membership for non-medoids;
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
178 # double, cost of configuration)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
179 #npaMatrix is samples x rows
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
180 #Build a matrix of lists of indicies to pass to the distance matrix
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
181 lliIndicesMatrix = [[iIndexPosition] for iIndexPosition in xrange(0,len(npaMatrix))]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
182 medoidsData = medoidsMaker.compute(np.array(lliIndicesMatrix))
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
183 logging.debug("MicroPITA.funcGetCentralSamplesByKMedoids:: Results from the kmedoid method in representative selection:")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
184 logging.debug(str(medoidsData))
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
185
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
186 #If returning the same amount of clusters and samples
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
187 #Return centroids
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
188 selectedIndexes = medoidsData[0]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
189 return [lsSampleNames[selectedIndexes[index]] for index in xrange(0,iNumberSamplesReturned)]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
190
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
191 ####Group 3## Highest Dissimilarity
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
192 #Testing: Happy path tested
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
193 def funcSelectExtremeSamplesFromHClust(self, strBetaMetric, npaAbundanceMatrix, lsSampleNames, iSelectSampleCount, istmBetaMatrix=None, istrmTree=None, istrmEnvr=None):
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
194 """
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
195 Select extreme samples from HClustering.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
196
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
197 :param strBetaMetric: The beta metric to use for distance matrix generation.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
198 :type: String The name of the beta metric to use.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
199 :param npaAbundanceMatrix: Numpy array where row=samples and columns=features.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
200 :type: Numpy Array Abundance data.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
201 :param lsSampleNames: The names of the sample.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
202 :type: List List of strings.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
203 :param iSelectSampleCount: Number of samples to select (return).
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
204 :type: Integer Integer number of samples returned.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
205 :return Samples: List of samples.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
206 :param istmBetaMatrix: File with beta-diversity matrix
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
207 :type: File stream or file path string
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
208 """
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
209
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
210 #If they want all the sample count, return all sample names
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
211 iSampleCount=len(npaAbundanceMatrix[:,0])
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
212 if iSelectSampleCount==iSampleCount:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
213 return lsSampleNames
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
214
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
215 #Holds the samples to be returned
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
216 lsReturnSamplesRet = []
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
217
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
218 #Generate beta matrix
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
219 #Returns condensed matrix
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
220 tempDistanceMatrix = scipy.spatial.distance.squareform(Metric.funcReadMatrixFile(istmMatrixFile=istmBetaMatrix,lsSampleOrder=lsSampleNames)[0]) if istmBetaMatrix else Metric.funcGetBetaMetric(npadAbundancies=npaAbundanceMatrix, sMetric=strBetaMetric, istrmTree=istrmTree, istrmEnvr=istrmEnvr, lsSampleOrder=lsSampleNames, fAdditiveInverse = True)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
221
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
222 if strBetaMetric in [Metric.c_strUnifracUnweighted,Metric.c_strUnifracWeighted]:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
223 tempDistanceMatrix = tempDistanceMatrix[0]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
224
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
225 if type(tempDistanceMatrix) is BooleanType:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
226 logging.error("MicroPITA.funcSelectExtremeSamplesFromHClust:: Could not read in the supplied distance matrix, returning false.")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
227 return False
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
228
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
229 if istmBetaMatrix:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
230 tempDistanceMatrix = 1-tempDistanceMatrix
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
231
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
232 #Feed beta matrix to linkage to cluster
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
233 #Send condensed matrix
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
234 linkageMatrix = hcluster.linkage(tempDistanceMatrix, method=self.c_strHierarchicalClusterMethod)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
235
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
236 #Extract cluster information from dendrogram
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
237 #The linakge matrix is of the form
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
238 #[[int1 int2 doube int3],...]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
239 #int1 and int1 are the paired samples indexed at 0 and up.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
240 #each list is an entry for a branch that is number starting with the first
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
241 #list being sample count index + 1
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
242 #each list is then named by an increment as they appear
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
243 #this means that if a number is in the list and is = sample count or greater it is not
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
244 #terminal and is instead a branch.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
245 #This method just takes the lowest metric measurement (highest distance pairs/clusters)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
246 #Works much better than the original technique
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
247 #get total number of samples
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
248
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
249 iCurrentSelectCount = 0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
250 for row in linkageMatrix:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
251 #Get nodes ofthe lowest pairing (so the furthest apart pair)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
252 iNode1 = int(row[0])
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
253 iNode2 = int(row[1])
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
254 #Make sure the nodes are a terminal node (sample) and not a branch in the dendrogram
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
255 #The branching in the dendrogram will start at the number of samples and increment higher.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
256 #Add each of the pair one at a time breaking when enough samples are selected.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
257 if iNode1<iSampleCount:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
258 lsReturnSamplesRet.append(lsSampleNames[iNode1])
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
259 iCurrentSelectCount = iCurrentSelectCount + 1
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
260 if iCurrentSelectCount == iSelectSampleCount:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
261 break
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
262 if iNode2<iSampleCount:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
263 lsReturnSamplesRet.append(lsSampleNames[iNode2])
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
264 iCurrentSelectCount = iCurrentSelectCount + 1
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
265 if iCurrentSelectCount == iSelectSampleCount:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
266 break
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
267
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
268 #Return selected samples
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
269 return lsReturnSamplesRet
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
270
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
271 ####Group 4## Rank Average of user Defined Taxa
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
272 #Testing: Happy Path Tested
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
273 def funcGetAverageAbundanceSamples(self, abndTable, lsTargetedFeature, fRank=False):
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
274 """
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
275 Averages feature abundance or ranked abundance. Expects a column 0 of taxa id that is skipped.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
276
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
277 :param abndTable: Abundance Table to analyse
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
278 :type: AbundanceTable Abundance Table
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
279 :param lsTargetedFeature: String names
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
280 :type: list list of string names of features (bugs) which are measured after ranking against the full sample
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
281 :param fRank: Indicates to rank the abundance before getting the average abundance of the features (default false)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
282 :type: boolean Flag indicating ranking abundance before calculating average feature measurement (false= no ranking)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
283 :return List of lists or boolean: List of lists or False on error. One internal list per sample indicating the sample,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
284 feature average abundance or ranked abundance. Lists will already be sorted.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
285 For not Ranked [[sample,average abundance of selected feature,1]]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
286 For Ranked [[sample,average ranked abundance, average abundance of selected feature]]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
287 Error Returns false
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
288 """
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
289
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
290 llAbundance = abndTable.funcGetAverageAbundancePerSample(lsTargetedFeature)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
291 if not llAbundance:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
292 logging.error("MicroPITA.funcGetAverageAbundanceSamples:: Could not get average abundance, returned false. Make sure the features (bugs) are spelled correctly and in the abundance table.")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
293 return False
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
294 #Add a space for ranking if needed
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
295 #Not ranked will be [[sSample,average abundance,1]]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
296 #(where 1 will not discriminant ties if used in later functions, so this generalizes)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
297 #Ranked will be [[sSample, average rank, average abundance]]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
298 llRetAbundance = [[llist[0],-1,llist[1]] for llist in llAbundance]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
299 #Rank if needed
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
300 if fRank:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
301 abndRanked = abndTable.funcRankAbundance()
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
302 if abndRanked == None:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
303 logging.error("MicroPITA.funcGetAverageAbundanceSamples:: Could not rank the abundance table, returned false.")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
304 return False
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
305 llRetRank = abndRanked.funcGetAverageAbundancePerSample(lsTargetedFeature)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
306 if not llRetRank:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
307 logging.error("MicroPITA.funcGetAverageAbundanceSamples:: Could not get average ranked abundance, returned false. Make sure the features (bugs) are spelled correctly and in the abundance table.")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
308 return False
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
309 dictRanks = dict(llRetRank)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
310 llRetAbundance = [[a[0],dictRanks[a[0]],a[2]] for a in llRetAbundance]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
311
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
312 #Sort first for ties and then for the main feature
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
313 if not fRank or ConstantsMicropita.c_fBreakRankTiesByDiversity:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
314 llRetAbundance = sorted(llRetAbundance, key = lambda sampleData: sampleData[2], reverse = not fRank)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
315 if fRank:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
316 llRetAbundance = sorted(llRetAbundance, key = lambda sampleData: sampleData[1], reverse = not fRank)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
317 return llRetAbundance
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
318
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
319 #Testing: Happy Path Tested
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
320 def funcSelectTargetedTaxaSamples(self, abndMatrix, lsTargetedTaxa, iSampleSelectionCount, sMethod = ConstantsMicropita.lsTargetedFeatureMethodValues[0]):
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
321 """
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
322 Selects samples with the highest ranks or abundance of targeted features.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
323 If ranked, select the highest abundance for tie breaking
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
324
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
325 :param abndMatrix: Abundance table to analyse
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
326 :type: AbundanceTable Abundance table
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
327 :param lsTargetedTaxa: List of features
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
328 :type: list list of strings
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
329 :param iSampleSelectionCount: Number of samples to select
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
330 :type: integer integer
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
331 :param sMethod: Method to select targeted features
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
332 :type: string String (Can be values found in ConstantsMicropita.lsTargetedFeatureMethodValues)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
333 :return List of strings: List of sample names which were selected
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
334 List of strings Empty list is returned on an error.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
335 """
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
336
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
337 #Check data
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
338 if(len(lsTargetedTaxa) < 1):
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
339 logging.error("MicroPITA.funcSelectTargetedTaxaSamples. Taxa defined selection was requested but no features were given.")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
340 return []
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
341
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
342 lsTargetedSamples = self.funcGetAverageAbundanceSamples(abndTable=abndMatrix, lsTargetedFeature=lsTargetedTaxa,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
343 fRank=sMethod.lower() == self.c_strTargetedRanked.lower())
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
344 #If an error occured or the key word for the method was not recognized
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
345 if lsTargetedSamples == False:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
346 logging.error("MicroPITA.funcSelectTargetedTaxaSamples:: Was not able to select for the features given. So targeted feature selection was performed. Check to make sure the features are spelled correctly and exist in the abundance file.")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
347 return []
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
348
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
349 #Select from results
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
350 return [sSample[0] for sSample in lsTargetedSamples[:iSampleSelectionCount]]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
351
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
352 ####Group 5## Random
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
353 #Testing: Happy path Tested
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
354 def funcGetRandomSamples(self, lsSamples=None, iNumberOfSamplesToReturn=0):
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
355 """
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
356 Returns random sample names of the number given. No replacement.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
357
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
358 :param lsSamples: List of sample names
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
359 :type: list list of strings
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
360 :param iNumberOfSamplesToReturn: Number of samples to select
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
361 :type: integer integer.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
362 :return List: List of selected samples (strings).
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
363 """
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
364
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
365 #Input matrix sample count
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
366 sampleCount = len(lsSamples)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
367
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
368 #Return the full matrix if they ask for a return matrix where length == original
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
369 if(iNumberOfSamplesToReturn >= sampleCount):
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
370 return lsSamples
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
371
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
372 #Get the random indices for the sample (without replacement)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
373 liRandomIndices = random.sample(range(sampleCount), iNumberOfSamplesToReturn)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
374
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
375 #Create a boolean array of if indexes are to be included in the reduced array
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
376 return [sSample for iIndex, sSample in enumerate(lsSamples) if iIndex in liRandomIndices]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
377
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
378 #Happy path tested (case 3)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
379 def funcGetAveragePopulation(self, abndTable, lfCompress):
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
380 """
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
381 Get the average row per column in the abndtable.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
382
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
383 :param abndTable: AbundanceTable of data to be averaged
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
384 :type: AbudanceTable
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
385 :param lfCompress: List of boolean flags (false means to remove sample before averaging
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
386 :type: List of floats
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
387 :return List of doubles:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
388 """
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
389 if sum(lfCompress) == 0:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
390 return []
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
391
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
392 #Get the average populations
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
393 lAverageRet = []
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
394
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
395 for sFeature in abndTable.funcGetAbundanceCopy():
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
396 sFeature = list(sFeature)[1:]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
397 sFeature=np.compress(lfCompress,sFeature,axis=0)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
398 lAverageRet.append(sum(sFeature)/float(len(sFeature)))
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
399 return lAverageRet
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
400
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
401 #Happy path tested (2 cases)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
402 def funcGetDistanceFromAverage(self, abndTable,ldAverage,lsSamples,lfSelected):
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
403 """
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
404 Given an abundance table and an average sample, this returns the distance of each sample
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
405 (measured using brays-curtis dissimilarity) from the average.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
406 The distances are reduced by needing to be in the lsSamples and being a true in the lfSelected
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
407 (which is associated with the samples in the order of the samples in the abundance table;
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
408 use abundancetable.funcGetSampleNames() to see the order if needed).
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
409
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
410 :param abndTable: Abundance table holding the data to be analyzed.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
411 :type: AbundanceTable
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
412 :param ldAverage: Average population (Average features of the abundance table of samples)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
413 :type: List of doubles which represent the average population
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
414 :param lsSamples: These are the only samples used in the analysis
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
415 :type: List of strings (sample ids)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
416 :param lfSelected: Samples to be included in the analysis
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
417 :type: List of boolean (true means include)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
418 :return: List of distances (doubles)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
419 """
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
420 #Get the distance from label 1 of all samples in label0 splitting into selected and not selected lists
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
421 ldSelectedDistances = []
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
422
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
423 for sSampleName in [sSample for iindex, sSample in enumerate(lsSamples) if lfSelected[iindex]]:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
424 #Get the sample measurements
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
425 ldSelectedDistances.append(Metric.funcGetBrayCurtisDissimilarity(np.array([abndTable.funcGetSample(sSampleName),ldAverage]))[0])
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
426 return ldSelectedDistances
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
427
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
428 #Happy path tested (1 case)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
429 def funcMeasureDistanceFromLabelToAverageOtherLabel(self, abndTable, lfGroupOfInterest, lfGroupOther):
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
430 """
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
431 Get the distance of samples from one label from the average sample of not the label.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
432 Note: This assumes 2 classes.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
433
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
434 :param abndTable: Table of data to work out of.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
435 :type: Abundace Table
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
436 :param lfGroupOfInterest: Boolean indicator of the sample being in the first group.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
437 :type: List of floats, true indicating an individual in the group of interest.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
438 :param lfGroupOther: Boolean indicator of the sample being in the other group.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
439 :type: List of floats, true indicating an individual in the
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
440 :return List of List of doubles: [list of tuples (string sample name,double distance) for the selected population, list of tuples for the not selected population]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
441 """
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
442 #Get all sample names
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
443 lsAllSamples = abndTable.funcGetSampleNames()
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
444
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
445 #Get average populations
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
446 lAverageOther = self.funcGetAveragePopulation(abndTable=abndTable, lfCompress=lfGroupOther)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
447
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
448 #Get the distance from the average of the other label (label 1)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
449 ldSelectedDistances = self.funcGetDistanceFromAverage(abndTable=abndTable, ldAverage=lAverageOther,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
450 lsSamples=lsAllSamples, lfSelected=lfGroupOfInterest)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
451
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
452 return zip([lsAllSamples[iindex] for iindex, fGroup in enumerate(lfGroupOfInterest) if fGroup],ldSelectedDistances)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
453
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
454 #Happy path tested (1 test case)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
455 def funcPerformDistanceSelection(self, abndTable, iSelectionCount, sLabel, sValueOfInterest):
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
456 """
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
457 Given metadata, metadata of one value (sValueOfInterest) is measured from the average (centroid) value of another label group.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
458 An iSelectionCount of samples is selected from the group of interest closest to and furthest from the centroid of the other group.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
459
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
460 :params abndTable: Abundance of measurements
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
461 :type: AbundanceTable
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
462 :params iSelectionCount: The number of samples selected per sample.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
463 :type: Integer Integer greater than 0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
464 :params sLabel: ID of the metadata which is the supervised label
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
465 :type: String
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
466 :params sValueOfInterest: Metadata value in the sLabel metadta row of the abundance table which defines the group of interest.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
467 :type: String found in the abundance table metadata row indicated by sLabel.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
468 :return list list of tuples (samplename, distance) [[iSelectionCount of tuples closest to the other centroid], [iSelectionCount of tuples farthest from the other centroid], [all tuples of samples not selected]]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
469 """
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
470
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
471 lsMetadata = abndTable.funcGetMetadata(sLabel)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
472 #Other metadata values
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
473 lsUniqueOtherValues = list(set(lsMetadata)-set(sValueOfInterest))
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
474
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
475 #Get boolean indicator of values of interest
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
476 lfLabelsInterested = [sValueOfInterest == sValue for sValue in lsMetadata]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
477
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
478 #Get the distances of the items of interest from the other metadata values
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
479 dictDistanceAverages = {}
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
480 for sOtherLabel in lsUniqueOtherValues:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
481 #Get boolean indicator of labels not of interest
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
482 lfLabelsOther = [sOtherLabel == sValue for sValue in lsMetadata]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
483
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
484 #Get the distances of data from two different groups to the average of the other
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
485 ldValueDistances = dict(self.funcMeasureDistanceFromLabelToAverageOtherLabel(abndTable, lfLabelsInterested, lfLabelsOther))
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
486
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
487 for sKey in ldValueDistances:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
488 dictDistanceAverages[sKey] = ldValueDistances[sKey] + dictDistanceAverages[sKey] if sKey in dictDistanceAverages else ldValueDistances[sKey]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
489
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
490 #Finish average by dividing by length of lsUniqueOtherValues
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
491 ltpleAverageDistances = [(sKey, dictDistanceAverages[sKey]/float(len(lsUniqueOtherValues))) for sKey in dictDistanceAverages]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
492
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
493 #Sort to extract extremes
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
494 ltpleAverageDistances = sorted(ltpleAverageDistances,key=operator.itemgetter(1))
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
495
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
496 #Get the closest and farthest distances
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
497 ltupleDiscriminantSamples = ltpleAverageDistances[:iSelectionCount]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
498 ltupleDistinctSamples = ltpleAverageDistances[iSelectionCount*-1:]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
499
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
500 #Remove the selected samples from the larger population of distances (better visualization)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
501 ldSelected = [tpleSelected[0] for tpleSelected in ltupleDiscriminantSamples+ltupleDistinctSamples]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
502
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
503 #Return discriminant tuples, distinct tuples, other tuples
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
504 return [ltupleDiscriminantSamples, ltupleDistinctSamples,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
505 [tplData for tplData in ltpleAverageDistances if tplData[0] not in ldSelected]]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
506
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
507 #Run the supervised method surrounding distance from centroids
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
508 #Happy path tested (3 test cases)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
509 def funcRunSupervisedDistancesFromCentroids(self, abundanceTable, fRunDistinct, fRunDiscriminant,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
510 xOutputSupFile, xPredictSupFile, strSupervisedMetadata,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
511 iSampleSupSelectionCount, lsOriginalSampleNames, lsOriginalLabels, fAppendFiles = False):
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
512 """
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
513 Runs supervised methods based on measuring distances of one label from the centroid of another. NAs are evaluated as theirown group.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
514
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
515 :param abundanceTable: AbundanceTable
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
516 :type: AbudanceTable Data to analyze
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
517 :param fRunDistinct: Run distinct selection method
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
518 :type: Boolean boolean (true runs method)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
519 :param fRunDiscriminant: Run discriminant method
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
520 :type: Boolean boolean (true runs method)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
521 :param xOutputSupFile: File output from supervised methods detailing data going into the method.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
522 :type: String or FileStream
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
523 :param xPredictSupFile: File output from supervised methods distance results from supervised methods.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
524 :type: String or FileStream
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
525 :param strSupervisedMetadata: The metadata that will be used to group samples.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
526 :type: String
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
527 :param iSampleSupSelectionCount: Number of samples to select
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
528 :type: Integer int sample selection count
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
529 :param lsOriginalSampleNames: List of the sample names, order is important and should be preserved from the abundanceTable.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
530 :type: List of samples
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
531 :param fAppendFiles: Indicates that output files already exist and appending is occuring.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
532 :type: Boolean
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
533 :return Selected Samples: A dictionary of selected samples by selection ID
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
534 Dictionary {"Selection Method":["SampleID","SampleID"...]}
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
535 """
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
536 #Get labels and run one label against many
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
537 lstrMetadata = abundanceTable.funcGetMetadata(strSupervisedMetadata)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
538 dictlltpleDistanceMeasurements = {}
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
539 for sMetadataValue in set(lstrMetadata):
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
540
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
541 #For now perform the selection here for the label of interest against the other labels
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
542 dictlltpleDistanceMeasurements.setdefault(sMetadataValue,[]).extend(self.funcPerformDistanceSelection(abndTable=abundanceTable,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
543 iSelectionCount=iSampleSupSelectionCount, sLabel=strSupervisedMetadata, sValueOfInterest=sMetadataValue))
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
544
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
545 #Make expected output files for supervised methods
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
546 #1. Output file which is similar to an input file for SVMs
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
547 #2. Output file that is similar to the probabilitic output of a SVM (LibSVM)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
548 #Manly for making output of supervised methods (Distance from Centroid) similar
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
549 #MicropitaVis needs some of these files
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
550 if xOutputSupFile:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
551 if fAppendFiles:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
552 SVM.funcUpdateSVMFileWithAbundanceTable(abndAbundanceTable=abundanceTable, xOutputSVMFile=xOutputSupFile,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
553 lsOriginalLabels=lsOriginalLabels, lsSampleOrdering=lsOriginalSampleNames)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
554 else:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
555 SVM.funcConvertAbundanceTableToSVMFile(abndAbundanceTable=abundanceTable, xOutputSVMFile=xOutputSupFile,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
556 sMetadataLabel=strSupervisedMetadata, lsOriginalLabels=lsOriginalLabels, lsSampleOrdering=lsOriginalSampleNames)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
557
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
558 #Will contain the samples selected to return
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
559 #One or more of the methods may be active so this is why I am extending instead of just returning the result of each method type
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
560 dictSelectedSamplesRet = dict()
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
561 for sKey, ltplDistances in dictlltpleDistanceMeasurements.items():
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
562 if fRunDistinct:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
563 dictSelectedSamplesRet.setdefault(ConstantsMicropita.c_strDistinct,[]).extend([ltple[0] for ltple in ltplDistances[1]])
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
564 if fRunDiscriminant:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
565 dictSelectedSamplesRet.setdefault(ConstantsMicropita.c_strDiscriminant,[]).extend([ltple[0] for ltple in ltplDistances[0]])
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
566
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
567 if xPredictSupFile:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
568 dictFlattenedDistances = dict()
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
569 [dictFlattenedDistances.setdefault(sKey, []).append(tple)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
570 for sKey, lltple in dictlltpleDistanceMeasurements.items()
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
571 for ltple in lltple for tple in ltple]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
572 if fAppendFiles:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
573 self._updatePredictFile(xPredictSupFile=xPredictSupFile, xInputLabelsFile=xOutputSupFile,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
574 dictltpleDistanceMeasurements=dictFlattenedDistances, abundanceTable=abundanceTable, lsOriginalSampleNames=lsOriginalSampleNames)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
575 else:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
576 self._writeToPredictFile(xPredictSupFile=xPredictSupFile, xInputLabelsFile=xOutputSupFile,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
577 dictltpleDistanceMeasurements=dictFlattenedDistances, abundanceTable=abundanceTable, lsOriginalSampleNames=lsOriginalSampleNames)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
578 return dictSelectedSamplesRet
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
579
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
580 #Two happy path test cases
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
581 def _updatePredictFile(self, xPredictSupFile, xInputLabelsFile, dictltpleDistanceMeasurements, abundanceTable, lsOriginalSampleNames):
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
582 """
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
583 Manages updating the predict file.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
584
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
585 :param xPredictSupFile: File that has predictions (distances) from the supervised method.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
586 :type: FileStream or String file path
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
587 :param xInputLabelsFile: File that as input to the supervised methods.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
588 :type: FileStream or String file path
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
589 :param dictltpleDistanceMeasurements:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
590 :type: Dictionary of lists of tuples {"labelgroup":[("SampleName",dDistance)...], "labelgroup":[("SampleName",dDistance)...]}
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
591 """
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
592
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
593 if not isinstance(xPredictSupFile, str):
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
594 xPredictSupFile.close()
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
595 xPredictSupFile = xPredictSupFile.name
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
596 csvr = open(xPredictSupFile,'r')
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
597
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
598 f = csv.reader(csvr,delimiter=ConstantsBreadCrumbs.c_strBreadCrumbsSVMSpace)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
599 lsHeader = f.next()[1:]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
600 dictlltpleRead = dict([(sHeader,[]) for sHeader in lsHeader])
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
601
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
602 #Read data in
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
603 iSampleIndex = 0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
604 for sRow in f:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
605 sLabel = sRow[0]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
606 [dictlltpleRead[lsHeader[iDistanceIndex]].append((lsOriginalSampleNames[iSampleIndex],dDistance)) for iDistanceIndex, dDistance in enumerate(sRow[1:])
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
607 if not dDistance == ConstantsMicropita.c_sEmptyPredictFileValue]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
608 iSampleIndex += 1
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
609
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
610 #Combine dictltpleDistanceMeasurements with new data
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
611 #If they share a key then merge keeping parameter data
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
612 #If they do not share the key, keep the full data
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
613 dictNew = {}
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
614 for sKey in dictltpleDistanceMeasurements.keys():
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
615 lsSamples = [tple[0] for tple in dictltpleDistanceMeasurements[sKey]]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
616 dictNew[sKey] = dictltpleDistanceMeasurements[sKey]+[tple for tple in dictlltpleRead[sKey] if tple[0] not in lsSamples] if sKey in dictlltpleRead.keys() else dictltpleDistanceMeasurements[sKey]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
617 for sKey in dictlltpleRead:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
618 if sKey not in dictltpleDistanceMeasurements.keys():
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
619 dictNew[sKey] = dictlltpleRead[sKey]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
620
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
621 #Call writer
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
622 self._writeToPredictFile(xPredictSupFile=xPredictSupFile, xInputLabelsFile=xInputLabelsFile,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
623 dictltpleDistanceMeasurements=dictNew, abundanceTable=abundanceTable,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
624 lsOriginalSampleNames=lsOriginalSampleNames, fFromUpdate=True)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
625
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
626 #2 happy path test cases
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
627 def _writeToPredictFile(self, xPredictSupFile, xInputLabelsFile, dictltpleDistanceMeasurements, abundanceTable, lsOriginalSampleNames, fFromUpdate=False):
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
628 """
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
629 Write to the predict file.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
630
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
631 :param xPredictSupFile: File that has predictions (distances) from the supervised method.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
632 :type: FileStream or String file path
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
633 :param xInputLabelsFile: File that as input to the supervised methods.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
634 :type: FileStream or String file path
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
635 :param dictltpleDistanceMeasurements:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
636 :type: Dictionary of lists of tuples {"labelgroup":[("SampleName",dDistance)...], "labelgroup":[("SampleName",dDistance)...]}
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
637 :param abundanceTable: An abundance table of the sample data.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
638 :type: AbundanceTable
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
639 :param lsOriginalSampleNames: Used if the file is being updated as the sample names so that it may be passed in and consistent with other writing.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
640 Otherwise will use the sample names from the abundance table.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
641 :type: List of strings
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
642 :param fFromUpdate: Indicates if this is part of an update to the file or not.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
643 :type: Boolean
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
644 """
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
645
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
646 xInputLabelsFileName = xInputLabelsFile
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
647 if not isinstance(xInputLabelsFile,str):
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
648 xInputLabelsFileName = xInputLabelsFile.name
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
649 f = csv.writer(open(xPredictSupFile,"w") if isinstance(xPredictSupFile, str) else xPredictSupFile,delimiter=ConstantsBreadCrumbs.c_strBreadCrumbsSVMSpace)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
650
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
651 lsAllSampleNames = abundanceTable.funcGetSampleNames()
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
652 lsLabels = SVM.funcReadLabelsFromFile(xSVMFile=xInputLabelsFileName, lsAllSampleNames= lsOriginalSampleNames if fFromUpdate else lsAllSampleNames,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
653 isPredictFile=False)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
654 dictLabels = dict([(sSample,sLabel) for sLabel in lsLabels.keys() for sSample in lsLabels[sLabel]])
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
655
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
656 #Dictionay keys will be used to order the predict file
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
657 lsMeasurementKeys = dictltpleDistanceMeasurements.keys()
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
658 #Make header
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
659 f.writerow(["labels"]+lsMeasurementKeys)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
660
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
661 #Reformat dictionary to make it easier to use
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
662 for sKey in dictltpleDistanceMeasurements:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
663 dictltpleDistanceMeasurements[sKey] = dict([ltpl for ltpl in dictltpleDistanceMeasurements[sKey]])
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
664
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
665 for sSample in lsOriginalSampleNames:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
666 #Make body of file
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
667 f.writerow([dictLabels.get(sSample,ConstantsMicropita.c_sEmptyPredictFileValue)]+
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
668 [str(dictltpleDistanceMeasurements[sKey].get(sSample,ConstantsMicropita.c_sEmptyPredictFileValue))
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
669 for sKey in lsMeasurementKeys])
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
670
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
671 def _funcRunNormalizeSensitiveMethods(self, abndData, iSampleSelectionCount, dictSelectedSamples, lsAlphaMetrics, lsBetaMetrics, lsInverseBetaMetrics,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
672 fRunDiversity, fRunRepresentative, fRunExtreme, strAlphaMetadata=None,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
673 istmBetaMatrix=None, istrmTree=None, istrmEnvr=None, fInvertDiversity=False):
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
674 """
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
675 Manages running methods that are sensitive to normalization. This is called twice, once for the set of methods which should not be normalized and the other
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
676 for the set that should be normalized.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
677
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
678 :param abndData: Abundance table object holding the samples to be measured.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
679 :type: AbundanceTable
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
680 :param iSampleSelectionCount The number of samples to select per method.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
681 :type: Integer
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
682 :param dictSelectedSamples Will be added to as samples are selected {"Method:["strSelectedSampleID","strSelectedSampleID"...]}.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
683 :type: Dictionary
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
684 :param lsAlphaMetrics: List of alpha metrics to use on alpha metric dependent assays (like highest diversity).
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
685 :type: List of strings
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
686 :param lsBetaMetrics: List of beta metrics to use on beta metric dependent assays (like most representative).
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
687 :type: List of strings
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
688 :param lsInverseBetaMetrics: List of inverse beta metrics to use on inverse beta metric dependent assays (like most dissimilar).
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
689 :type: List of strings
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
690 :param fRunDiversity: Run Diversity based methods (true indicates run).
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
691 :type: Boolean
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
692 :param fRunRepresentative: Run Representative based methods (true indicates run).
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
693 :type: Boolean
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
694 :param fRunExtreme: Run Extreme based methods (true indicates run).
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
695 :type: Boolean
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
696 :param istmBetaMatrix: File that has a precalculated beta matrix
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
697 :type: File stream or File path string
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
698 :return Selected Samples: Samples selected by methods.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
699 Dictionary {"Selection Method":["SampleID","SampleID","SampleID",...]}
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
700 """
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
701
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
702 #Sample ids/names
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
703 lsSampleNames = abndData.funcGetSampleNames()
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
704
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
705 #Generate alpha metrics and get most diverse
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
706 if fRunDiversity:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
707
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
708 #Get Alpha metrics matrix
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
709 internalAlphaMatrix = None
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
710 #Name of technique
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
711 strMethod = [strAlphaMetadata] if strAlphaMetadata else lsAlphaMetrics
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
712
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
713 #If given an alpha-diversity metadata
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
714 if strAlphaMetadata:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
715 internalAlphaMatrix = [[float(strNum) for strNum in abndData.funcGetMetadata(strAlphaMetadata)]]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
716 else:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
717 #Expects Observations (Taxa (row) x sample (column))
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
718 #Returns [[metric1-sample1, metric1-sample2, metric1-sample3],[metric1-sample1, metric1-sample2, metric1-sample3]]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
719 internalAlphaMatrix = Metric.funcBuildAlphaMetricsMatrix(npaSampleAbundance = abndData.funcGetAbundanceCopy()
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
720 if not abndData.funcIsSummed()
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
721 else abndData.funcGetFeatureAbundanceTable(abndData.funcGetTerminalNodes()).funcGetAbundanceCopy(),
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
722 lsSampleNames = lsSampleNames, lsDiversityMetricAlpha = lsAlphaMetrics)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
723
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
724 if internalAlphaMatrix:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
725 #Invert measurments
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
726 if fInvertDiversity:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
727 lldNewDiversity = []
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
728 for lsLine in internalAlphaMatrix:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
729 lldNewDiversity.append([1/max(dValue,ConstantsMicropita.c_smallNumber) for dValue in lsLine])
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
730 internalAlphaMatrix = lldNewDiversity
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
731 #Get top ranked alpha diversity by most diverse
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
732 #Expects [[sample1,sample2,sample3...],[sample1,sample2,sample3..],...]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
733 #Returns [[sampleName1, sampleName2, sampleNameN],[sampleName1, sampleName2, sampleNameN]]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
734 mostDiverseAlphaSamplesIndexes = self.funcGetTopRankedSamples(lldMatrix=internalAlphaMatrix, lsSampleNames=lsSampleNames, iTopAmount=iSampleSelectionCount)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
735
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
736 #Add to results
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
737 for index in xrange(0,len(strMethod)):
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
738 strSelectionMethod = self.dictConvertAMetricDiversity.get(strMethod[index],ConstantsMicropita.c_strDiversity+"="+strMethod[index])
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
739 dictSelectedSamples.setdefault(strSelectionMethod,[]).extend(mostDiverseAlphaSamplesIndexes[index])
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
740
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
741 logging.info("MicroPITA.funcRunNormalizeSensitiveMethods:: Selected Samples 1b")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
742 logging.info(dictSelectedSamples)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
743
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
744 #Generate beta metrics and
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
745 if fRunRepresentative or fRunExtreme:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
746
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
747 #Abundance matrix transposed
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
748 npaTransposedAbundance = UtilityMath.funcTransposeDataMatrix(abndData.funcGetAbundanceCopy(), fRemoveAdornments=True)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
749
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
750 #Get center selection using clusters/tiling
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
751 #This will be for beta metrics in normalized space
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
752 if fRunRepresentative:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
753
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
754 if istmBetaMatrix:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
755 #Get representative dissimilarity samples
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
756 medoidSamples=self.funcGetCentralSamplesByKMedoids(npaMatrix=npaTransposedAbundance, sMetric=ConstantsMicropita.c_custom, lsSampleNames=lsSampleNames, iNumberSamplesReturned=iSampleSelectionCount, istmBetaMatrix=istmBetaMatrix, istrmTree=istrmTree, istrmEnvr=istrmEnvr)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
757
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
758 if medoidSamples:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
759 dictSelectedSamples.setdefault(ConstantsMicropita.c_strRepresentative+"="+ConstantsMicropita.c_custom,[]).extend(medoidSamples)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
760 else:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
761 logging.info("MicroPITA.funcRunNormalizeSensitiveMethods:: Performing representative selection on normalized data.")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
762 for bMetric in lsBetaMetrics:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
763
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
764 #Get representative dissimilarity samples
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
765 medoidSamples=self.funcGetCentralSamplesByKMedoids(npaMatrix=npaTransposedAbundance, sMetric=bMetric, lsSampleNames=lsSampleNames, iNumberSamplesReturned=iSampleSelectionCount, istmBetaMatrix=istmBetaMatrix, istrmTree=istrmTree, istrmEnvr=istrmEnvr)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
766
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
767 if medoidSamples:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
768 dictSelectedSamples.setdefault(self.dictConvertBMetricToMethod.get(bMetric,ConstantsMicropita.c_strRepresentative+"="+bMetric),[]).extend(medoidSamples)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
769
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
770 #Get extreme selection using clusters, tiling
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
771 if fRunExtreme:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
772 logging.info("MicroPITA.funcRunNormalizeSensitiveMethods:: Performing extreme selection on normalized data.")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
773 if istmBetaMatrix:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
774
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
775 #Samples for representative dissimilarity
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
776 #This involves inverting the distance metric,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
777 #Taking the dendrogram level of where the number cluster == the number of samples to select
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
778 #Returning a repersentative sample from each cluster
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
779 extremeSamples = self.funcSelectExtremeSamplesFromHClust(strBetaMetric=ConstantsMicropita.c_custom, npaAbundanceMatrix=npaTransposedAbundance, lsSampleNames=lsSampleNames, iSelectSampleCount=iSampleSelectionCount, istmBetaMatrix=istmBetaMatrix, istrmTree=istrmTree, istrmEnvr=istrmEnvr)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
780
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
781 #Add selected samples
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
782 if extremeSamples:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
783 dictSelectedSamples.setdefault(ConstantsMicropita.c_strExtreme+"="+ConstantsMicropita.c_custom,[]).extend(extremeSamples)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
784
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
785 else:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
786 #Run KMedoids with inverse custom distance metric in normalized space
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
787 for bMetric in lsInverseBetaMetrics:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
788
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
789 #Samples for representative dissimilarity
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
790 #This involves inverting the distance metric,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
791 #Taking the dendrogram level of where the number cluster == the number of samples to select
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
792 #Returning a repersentative sample from each cluster
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
793 extremeSamples = self.funcSelectExtremeSamplesFromHClust(strBetaMetric=bMetric, npaAbundanceMatrix=npaTransposedAbundance, lsSampleNames=lsSampleNames, iSelectSampleCount=iSampleSelectionCount, istmBetaMatrix=istmBetaMatrix, istrmTree=istrmTree, istrmEnvr=istrmEnvr)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
794
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
795 #Add selected samples
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
796 if extremeSamples:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
797 dictSelectedSamples.setdefault(self.dictConvertInvBMetricToMethod.get(bMetric,ConstantsMicropita.c_strExtreme+"="+bMetric),[]).extend(extremeSamples)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
798
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
799 logging.info("MicroPITA.funcRunNormalizeSensitiveMethods:: Selected Samples 2,3b")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
800 logging.info(dictSelectedSamples)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
801 return dictSelectedSamples
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
802
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
803 def funcRun(self, strIDName, strLastMetadataName, istmInput,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
804 ostmInputPredictFile, ostmPredictFile, ostmCheckedFile, ostmOutput,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
805 cDelimiter, cFeatureNameDelimiter, strFeatureSelection,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
806 istmFeatures, iCount, lstrMethods, strLastRowMetadata = None, strLabel = None, strStratify = None,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
807 strCustomAlpha = None, strCustomBeta = None, strAlphaMetadata = None, istmBetaMatrix = None, istrmTree = None, istrmEnvr = None,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
808 iMinSeqs = ConstantsMicropita.c_liOccurenceFilter[0], iMinSamples = ConstantsMicropita.c_liOccurenceFilter[1], fInvertDiversity = False):
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
809 """
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
810 Manages the selection of samples given different metrics.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
811
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
812 :param strIDName: Sample Id metadata row
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
813 :type: String
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
814 :param strLastMetadataName: The id of the metadata positioned last in the abundance table.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
815 :type: String String metadata id.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
816 :param istmInput: File to store input data to supervised methods.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
817 :type: FileStream of String file path
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
818 :param ostmInputPredictFile: File to store distances from supervised methods.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
819 :type: FileStream or String file path
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
820 :param ostmCheckedFile: File to store the AbundanceTable data after it is being checked.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
821 :type: FileStream or String file path
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
822 :param ostmOutPut: File to store sample selection by methods of interest.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
823 :type: FileStream or String file path
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
824 :param cDelimiter: Delimiter of abundance table.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
825 :type: Character Char (default TAB).
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
826 :param cFeatureNameDelimiter: Delimiter of the name of features (for instance if they contain consensus lineages indicating clades).
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
827 :type: Character (default |).
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
828 :param stFeatureSelectionMethod: Which method to use to select features in a targeted manner (Using average ranked abundance or average abundance).
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
829 :type: String (specific values indicated in ConstantsMicropita.lsTargetedFeatureMethodValues).
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
830 :param istmFeatures: File which holds the features of interest if using targeted feature methodology.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
831 :type: FileStream or String file path
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
832 :param iCount: Number of samples to select in each methods, supervised methods select this amount per label if possible.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
833 :type: Integer integer.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
834 :param lstrMethods: List of strings indicating selection techniques.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
835 :type: List of string method names
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
836 :param strLabel: The metadata used for supervised labels.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
837 :type: String
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
838 :param strStratify: The metadata used to stratify unsupervised data.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
839 :type: String
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
840 :param strCustomAlpha: Custom alpha diversity metric
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
841 :type: String
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
842 :param strCustomBeta: Custom beta diversity metric
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
843 :type: String
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
844 :param strAlphaMetadata: Metadata id which is a diveristy metric to use in highest diversity sampling
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
845 :type: String
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
846 :param istmBetaMatrix: File containing precalculated beta-diversity matrix for representative sampling
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
847 :type: FileStream or String file path
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
848 :param istrmTree: File containing tree for phylogentic beta-diversity analysis
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
849 :type: FileStream or String file path
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
850 :param istrmEnvr: File containing environment for phylogentic beta-diversity analysis
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
851 :type: FileStream or String file path
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
852 :param iMinSeqs: Minimum sequence in the occurence filter which filters all features not with a minimum number of sequences in each of a minimum number of samples.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
853 :type: Integer
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
854 :param iMinSamples: Minimum sample count for the occurence filter.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
855 :type: Integer
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
856 :param fInvertDiversity: When true will invert diversity measurements before using.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
857 :type: boolean
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
858 :return Selected Samples: Samples selected by methods.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
859 Dictionary {"Selection Method":["SampleID","SampleID","SampleID",...]}
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
860 """
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
861
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
862 #Holds the top ranked samples from different metrics
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
863 #dict[metric name] = [samplename,samplename...]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
864 selectedSamples = dict()
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
865
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
866 #If a target feature file is given make sure that targeted feature is in the selection methods, if not add
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
867 if ConstantsMicropita.c_strFeature in lstrMethods:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
868 if not istmFeatures:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
869 logging.error("MicroPITA.funcRun:: Did not receive both the Targeted feature file and the feature selection method. MicroPITA did not run.")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
870 return False
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
871
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
872 #Diversity metrics to run
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
873 #Use custom metrics if specified
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
874 #Custom beta metrics set to normalized only, custom alpha metrics set to count only
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
875 diversityMetricsAlpha = [] if strCustomAlpha or strAlphaMetadata else [MicroPITA.c_strInverseSimpsonDiversity]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
876 diversityMetricsBeta = [] if istmBetaMatrix else [strCustomBeta] if strCustomBeta else [MicroPITA.c_strBrayCurtisDissimilarity]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
877 # inverseDiversityMetricsBeta = [MicroPITA.c_strInvBrayCurtisDissimilarity]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
878 diversityMetricsAlphaNoNormalize = [strAlphaMetadata] if strAlphaMetadata else [strCustomAlpha] if strCustomAlpha else []
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
879 diversityMetricsBetaNoNormalize = []
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
880 # inverseDiversityMetricsBetaNoNormalize = []
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
881
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
882 #Targeted taxa
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
883 userDefinedTaxa = []
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
884
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
885 #Perform different flows flags
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
886 c_RUN_MAX_DIVERSITY_1 = ConstantsMicropita.c_strDiversity in lstrMethods
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
887 c_RUN_REPRESENTIVE_DISSIMILARITY_2 = ConstantsMicropita.c_strRepresentative in lstrMethods
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
888 c_RUN_MAX_DISSIMILARITY_3 = ConstantsMicropita.c_strExtreme in lstrMethods
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
889 c_RUN_RANK_AVERAGE_USER_4 = False
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
890 if ConstantsMicropita.c_strFeature in lstrMethods:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
891 c_RUN_RANK_AVERAGE_USER_4 = True
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
892 if not istmFeatures:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
893 logging.error("MicroPITA.funcRun:: No taxa file was given for taxa selection.")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
894 return False
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
895 #Read in taxa list, break down to lines and filter out empty strings
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
896 userDefinedTaxa = filter(None,(s.strip( ) for s in istmFeatures.readlines()))
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
897 c_RUN_RANDOM_5 = ConstantsMicropita.c_strRandom in lstrMethods
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
898 c_RUN_DISTINCT = ConstantsMicropita.c_strDistinct in lstrMethods
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
899 c_RUN_DISCRIMINANT = ConstantsMicropita.c_strDiscriminant in lstrMethods
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
900
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
901 #Read in abundance data
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
902 #Abundance is a structured array. Samples (column) by Taxa (rows) with the taxa id row included as the column index=0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
903 #Abundance table object to read in and manage data
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
904 totalAbundanceTable = AbundanceTable.funcMakeFromFile(xInputFile=istmInput, lOccurenceFilter = [iMinSeqs, iMinSamples],
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
905 cDelimiter=cDelimiter, sMetadataID=strIDName, sLastMetadataRow=strLastRowMetadata,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
906 sLastMetadata=strLastMetadataName, cFeatureNameDelimiter=cFeatureNameDelimiter, xOutputFile=ostmCheckedFile)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
907 if not totalAbundanceTable:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
908 logging.error("MicroPITA.funcRun:: Could not read in the abundance table. Analysis was not performed."+
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
909 " This often occurs when the Last Metadata is not specified correctly."+
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
910 " Please check to make sure the Last Metadata selection is the row of the last metadata,"+
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
911 " all values after this selection should be microbial measurements and should be numeric.")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
912 return False
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
913
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
914 lsOriginalLabels = SVM.funcMakeLabels(totalAbundanceTable.funcGetMetadata(strLabel)) if strLabel else strLabel
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
915
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
916 dictTotalMetadata = totalAbundanceTable.funcGetMetadataCopy()
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
917 logging.debug("MicroPITA.funcRun:: Received metadata=" + str(dictTotalMetadata))
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
918 #If there is only 1 unique value for the labels, do not run the Supervised methods
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
919 if strLabel and ( len(set(dictTotalMetadata.get(strLabel,[]))) < 2 ):
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
920 logging.error("The label " + strLabel + " did not have 2 or more values. Labels found=" + str(dictTotalMetadata.get(strLabel,[])))
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
921 return False
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
922
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
923 #Run unsupervised methods###
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
924 #Stratify the data if need be and drop the old data
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
925 lStratifiedAbundanceTables = totalAbundanceTable.funcStratifyByMetadata(strStratify) if strStratify else [totalAbundanceTable]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
926
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
927 #For each stratified abundance block or for the unstratfified abundance
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
928 #Run the unsupervised blocks
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
929 fAppendSupFiles = False
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
930 for stratAbundanceTable in lStratifiedAbundanceTables:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
931 logging.info("MicroPITA.funcRun:: Running abundance block:"+stratAbundanceTable.funcGetName())
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
932
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
933 ###NOT SUMMED, NOT NORMALIZED
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
934 #Only perform if the data is not yet normalized
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
935 if not stratAbundanceTable.funcIsNormalized( ):
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
936 #Need to first work with unnormalized data
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
937 if c_RUN_MAX_DIVERSITY_1 or c_RUN_REPRESENTIVE_DISSIMILARITY_2 or c_RUN_MAX_DISSIMILARITY_3:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
938
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
939 self._funcRunNormalizeSensitiveMethods(abndData=stratAbundanceTable, iSampleSelectionCount=iCount,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
940 dictSelectedSamples=selectedSamples, lsAlphaMetrics=diversityMetricsAlphaNoNormalize,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
941 lsBetaMetrics=diversityMetricsBetaNoNormalize,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
942 lsInverseBetaMetrics=diversityMetricsBetaNoNormalize,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
943 fRunDiversity=c_RUN_MAX_DIVERSITY_1,fRunRepresentative=c_RUN_REPRESENTIVE_DISSIMILARITY_2,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
944 fRunExtreme=c_RUN_MAX_DISSIMILARITY_3, strAlphaMetadata=strAlphaMetadata,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
945 istrmTree=istrmTree, istrmEnvr=istrmEnvr, fInvertDiversity=fInvertDiversity)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
946
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
947
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
948 #Generate selection by the rank average of user defined taxa
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
949 #Expects (Taxa (row) by Samples (column))
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
950 #Expects a column 0 of taxa id that is skipped
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
951 #Returns [(sample name,average,rank)]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
952 #SUMMED AND NORMALIZED
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
953 stratAbundanceTable.funcSumClades()
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
954 #Normalize data at this point
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
955 stratAbundanceTable.funcNormalize()
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
956 if c_RUN_RANK_AVERAGE_USER_4:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
957 selectedSamples[ConstantsMicropita.c_strFeature] = self.funcSelectTargetedTaxaSamples(abndMatrix=stratAbundanceTable,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
958 lsTargetedTaxa=userDefinedTaxa, iSampleSelectionCount=iCount, sMethod=strFeatureSelection)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
959 logging.info("MicroPITA.funcRun:: Selected Samples Rank")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
960 logging.info(selectedSamples)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
961
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
962 ###SUMMED AND NORMALIZED analysis block
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
963 #Diversity based metric will move reduce to terminal taxa as needed
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
964 if c_RUN_MAX_DIVERSITY_1 or c_RUN_REPRESENTIVE_DISSIMILARITY_2 or c_RUN_MAX_DISSIMILARITY_3:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
965
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
966 self._funcRunNormalizeSensitiveMethods(abndData=stratAbundanceTable, iSampleSelectionCount=iCount,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
967 dictSelectedSamples=selectedSamples, lsAlphaMetrics=diversityMetricsAlpha,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
968 lsBetaMetrics=diversityMetricsBeta,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
969 lsInverseBetaMetrics=diversityMetricsBeta,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
970 fRunDiversity=c_RUN_MAX_DIVERSITY_1,fRunRepresentative=c_RUN_REPRESENTIVE_DISSIMILARITY_2,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
971 fRunExtreme=c_RUN_MAX_DISSIMILARITY_3,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
972 istmBetaMatrix=istmBetaMatrix, istrmTree=istrmTree, istrmEnvr=istrmEnvr, fInvertDiversity=fInvertDiversity)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
973
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
974 #5::Select randomly
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
975 #Expects sampleNames = List of sample names [name, name, name...]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
976 if(c_RUN_RANDOM_5):
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
977 #Select randomly from sample names
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
978 selectedSamples[ConstantsMicropita.c_strRandom] = self.funcGetRandomSamples(lsSamples=stratAbundanceTable.funcGetSampleNames(), iNumberOfSamplesToReturn=iCount)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
979 logging.info("MicroPITA.funcRun:: Selected Samples Random")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
980 logging.info(selectedSamples)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
981
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
982 #Perform supervised selection
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
983 if c_RUN_DISTINCT or c_RUN_DISCRIMINANT:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
984 if strLabel:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
985 dictSelectionRet = self.funcRunSupervisedDistancesFromCentroids(abundanceTable=stratAbundanceTable,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
986 fRunDistinct=c_RUN_DISTINCT, fRunDiscriminant=c_RUN_DISCRIMINANT,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
987 xOutputSupFile=ostmInputPredictFile,xPredictSupFile=ostmPredictFile,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
988 strSupervisedMetadata=strLabel, iSampleSupSelectionCount=iCount,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
989 lsOriginalSampleNames = totalAbundanceTable.funcGetSampleNames(),
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
990 lsOriginalLabels = lsOriginalLabels,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
991 fAppendFiles=fAppendSupFiles)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
992
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
993 [selectedSamples.setdefault(sKey,[]).extend(lValue) for sKey,lValue in dictSelectionRet.items()]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
994
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
995 if not fAppendSupFiles:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
996 fAppendSupFiles = True
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
997 logging.info("MicroPITA.funcRun:: Selected Samples Unsupervised")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
998 logging.info(selectedSamples)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
999 return selectedSamples
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1000
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1001 #Testing: Happy path tested
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1002 @staticmethod
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1003 def funcWriteSelectionToFile(dictSelection,xOutputFilePath):
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1004 """
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1005 Writes the selection of samples by method to an output file.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1006
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1007 :param dictSelection: The dictionary of selections by method to be written to a file.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1008 :type: Dictionary The dictionary of selections by method {"method":["sample selected","sample selected"...]}
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1009 :param xOutputFilePath: FileStream or String path to file inwhich the dictionary is written.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1010 :type: String FileStream or String path to file
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1011 """
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1012
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1013 if not dictSelection:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1014 return
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1015
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1016 #Open file
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1017 f = csv.writer(open(xOutputFilePath,"w") if isinstance(xOutputFilePath, str) else xOutputFilePath, delimiter=ConstantsMicropita.c_outputFileDelim )
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1018
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1019 #Create output content from dictionary
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1020 for sKey in dictSelection:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1021 f.writerow([sKey]+dictSelection[sKey])
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1022 logging.debug("MicroPITA.funcRun:: Selected samples output to file:"+str(dictSelection[sKey]))
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1023
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1024 #Testing: Happy Path tested
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1025 @staticmethod
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1026 def funcReadSelectionFileToDictionary(xInputFile):
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1027 """
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1028 Reads in an output selection file from micropita and formats it into a dictionary.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1029
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1030 :param xInputFile: String path to file or file stream to read and translate into a dictionary.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1031 {"method":["sample selected","sample selected"...]}
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1032 :type: FileStream or String Path to file
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1033 :return Dictionary: Samples selected by methods.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1034 Dictionary {"Selection Method":["SampleID","SampleID","SampleID",...]}
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1035 """
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1036
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1037 #Open file
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1038 istmReader = csv.reader(open(xInputFile,'r') if isinstance(xInputFile, str) else xInputFile, delimiter = ConstantsMicropita.c_outputFileDelim)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1039
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1040 #Dictionary to hold selection data
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1041 return dict([(lsLine[0], lsLine[1:]) for lsLine in istmReader])
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1042
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1043 #Set up arguments reader
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1044 argp = argparse.ArgumentParser( prog = "MicroPITA.py",
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1045 description = """Selects samples from abundance tables based on various selection schemes.""" )
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1046
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1047 args = argp.add_argument_group( "Common", "Commonly modified options" )
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1048 args.add_argument(ConstantsMicropita.c_strCountArgument,"--num", dest="iCount", metavar = "samples", default = 10, type = int, help = ConstantsMicropita.c_strCountHelp)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1049 args.add_argument("-m","--method", dest = "lstrMethods", metavar = "method", default = [], help = ConstantsMicropita.c_strSelectionTechniquesHelp,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1050 choices = ConstantsMicropita.c_lsAllMethods, action = "append")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1051
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1052 args = argp.add_argument_group( "Custom", "Selecting and inputing custom metrics" )
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1053 args.add_argument("-a","--alpha", dest = "strAlphaDiversity", metavar = "AlphaDiversity", default = None, help = ConstantsMicropita.c_strCustomAlphaDiversityHelp, choices = Metric.setAlphaDiversities)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1054 args.add_argument("-b","--beta", dest = "strBetaDiversity", metavar = "BetaDiversity", default = None, help = ConstantsMicropita.c_strCustomBetaDiversityHelp, choices = list(Metric.setBetaDiversities)+[Metric.c_strUnifracUnweighted,Metric.c_strUnifracWeighted])
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1055 args.add_argument("-q","--alphameta", dest = "strAlphaMetadata", metavar = "AlphaDiversityMetadata", default = None, help = ConstantsMicropita.c_strCustomAlphaDiversityMetadataHelp)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1056 args.add_argument("-x","--betamatrix", dest = "istmBetaMatrix", metavar = "BetaDiversityMatrix", default = None, help = ConstantsMicropita.c_strCustomBetaDiversityMatrixHelp)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1057 args.add_argument("-o","--tree", dest = "istrmTree", metavar = "PhylogeneticTree", default = None, help = ConstantsMicropita.c_strCustomPhylogeneticTreeHelp)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1058 args.add_argument("-i","--envr", dest = "istrmEnvr", metavar = "EnvironmentFile", default = None, help = ConstantsMicropita.c_strCustomEnvironmentFileHelp)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1059 args.add_argument("-f","--invertDiversity", dest = "fInvertDiversity", action="store_true", default = False, help = ConstantsMicropita.c_strInvertDiversityHelp)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1060
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1061 args = argp.add_argument_group( "Miscellaneous", "Row/column identifiers and feature targeting options" )
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1062 args.add_argument("-d",ConstantsMicropita.c_strIDNameArgument, dest="strIDName", metavar="sample_id", help= ConstantsMicropita.c_strIDNameHelp)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1063 args.add_argument("-l",ConstantsMicropita.c_strLastMetadataNameArgument, dest="strLastMetadataName", metavar = "metadata_id", default = None,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1064 help= ConstantsMicropita.c_strLastMetadataNameHelp)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1065 args.add_argument("-r",ConstantsMicropita.c_strTargetedFeatureMethodArgument, dest="strFeatureSelection", metavar="targeting_method", default=ConstantsMicropita.lsTargetedFeatureMethodValues[0],
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1066 choices=ConstantsMicropita.lsTargetedFeatureMethodValues, help= ConstantsMicropita.c_strTargetedFeatureMethodHelp)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1067 args.add_argument("-t",ConstantsMicropita.c_strTargetedSelectionFileArgument, dest="istmFeatures", metavar="feature_file", type=argparse.FileType("rU"), help=ConstantsMicropita.c_strTargetedSelectionFileHelp)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1068 args.add_argument("-w",ConstantsMicropita.c_strFeatureMetadataArgument, dest="strLastFeatureMetadata", metavar="Last_Feature_Metadata", default=None, help=ConstantsMicropita.c_strFeatureMetadataHelp)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1069
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1070 args = argp.add_argument_group( "Data labeling", "Metadata IDs for strata and supervised label values" )
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1071 args.add_argument("-e",ConstantsMicropita.c_strSupervisedLabelArgument, dest="strLabel", metavar= "supervised_id", help=ConstantsMicropita.c_strSupervisedLabelHelp)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1072 args.add_argument("-s",ConstantsMicropita.c_strUnsupervisedStratifyMetadataArgument, dest="strUnsupervisedStratify", metavar="stratify_id",
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1073 help= ConstantsMicropita.c_strUnsupervisedStratifyMetadataHelp)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1074
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1075 args = argp.add_argument_group( "File formatting", "Rarely modified file formatting options" )
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1076 args.add_argument("-j",ConstantsMicropita.c_strFileDelimiterArgument, dest="cFileDelimiter", metavar="column_delimiter", default="\t", help=ConstantsMicropita.c_strFileDelimiterHelp)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1077 args.add_argument("-k",ConstantsMicropita.c_strFeatureNameDelimiterArgument, dest="cFeatureNameDelimiter", metavar="taxonomy_delimiter", default="|", help=ConstantsMicropita.c_strFeatureNameDelimiterHelp)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1078
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1079 args = argp.add_argument_group( "Debugging", "Debugging options - modify at your own risk!" )
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1080 args.add_argument("-v",ConstantsMicropita.c_strLoggingArgument, dest="strLogLevel", metavar = "log_level", default="WARNING",
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1081 choices=ConstantsMicropita.c_lsLoggingChoices, help= ConstantsMicropita.c_strLoggingHelp)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1082 args.add_argument("-c",ConstantsMicropita.c_strCheckedAbundanceFileArgument, dest="ostmCheckedFile", metavar = "output_qc", type = argparse.FileType("w"), help = ConstantsMicropita.c_strCheckedAbundanceFileHelp)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1083 args.add_argument("-g",ConstantsMicropita.c_strLoggingFileArgument, dest="ostmLoggingFile", metavar = "output_log", type = argparse.FileType("w"), help = ConstantsMicropita.c_strLoggingFileHelp)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1084 args.add_argument("-u",ConstantsMicropita.c_strSupervisedInputFile, dest="ostmInputPredictFile", metavar = "output_scaled", type = argparse.FileType("w"), help = ConstantsMicropita.c_strSupervisedInputFileHelp)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1085 args.add_argument("-p",ConstantsMicropita.c_strSupervisedPredictedFile, dest="ostmPredictFile", metavar = "output_labels", type = argparse.FileType("w"), help = ConstantsMicropita.c_strSupervisedPredictedFileHelp)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1086
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1087 argp.add_argument("istmInput", metavar = "input.pcl/biome", type = argparse.FileType("rU"), help = ConstantsMicropita.c_strAbundanceFileHelp,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1088 default = sys.stdin)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1089 argp.add_argument("ostmOutput", metavar = "output.txt", type = argparse.FileType("w"), help = ConstantsMicropita.c_strGenericOutputDataFileHelp,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1090 default = sys.stdout)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1091
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1092 __doc__ = "::\n\n\t" + argp.format_help( ).replace( "\n", "\n\t" ) + __doc__
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1093
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1094 def _main( ):
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1095 args = argp.parse_args( )
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1096
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1097 #Set up logger
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1098 iLogLevel = getattr(logging, args.strLogLevel.upper(), None)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1099 logging.basicConfig(stream = args.ostmLoggingFile if args.ostmLoggingFile else sys.stderr, filemode = 'w', level=iLogLevel)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1100
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1101 #Run micropita
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1102 logging.info("MicroPITA:: Start microPITA")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1103 microPITA = MicroPITA()
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1104
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1105 #Argparse will append to the default but will not remove the default so I do this here
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1106 if not len(args.lstrMethods):
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1107 args.lstrMethods = [ConstantsMicropita.c_strRepresentative]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1108
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1109 dictSelectedSamples = microPITA.funcRun(
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1110 strIDName = args.strIDName,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1111 strLastMetadataName = args.strLastMetadataName,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1112 istmInput = args.istmInput,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1113 ostmInputPredictFile = args.ostmInputPredictFile,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1114 ostmPredictFile = args.ostmPredictFile,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1115 ostmCheckedFile = args.ostmCheckedFile,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1116 ostmOutput = args.ostmOutput,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1117 cDelimiter = args.cFileDelimiter,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1118 cFeatureNameDelimiter = args.cFeatureNameDelimiter,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1119 istmFeatures = args.istmFeatures,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1120 strFeatureSelection = args.strFeatureSelection,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1121 iCount = args.iCount,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1122 strLastRowMetadata = args.strLastFeatureMetadata,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1123 strLabel = args.strLabel,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1124 strStratify = args.strUnsupervisedStratify,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1125 strCustomAlpha = args.strAlphaDiversity,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1126 strCustomBeta = args.strBetaDiversity,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1127 strAlphaMetadata = args.strAlphaMetadata,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1128 istmBetaMatrix = args.istmBetaMatrix,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1129 istrmTree = args.istrmTree,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1130 istrmEnvr = args.istrmEnvr,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1131 lstrMethods = args.lstrMethods,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1132 fInvertDiversity = args.fInvertDiversity
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1133 )
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1134
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1135 if not dictSelectedSamples:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1136 logging.error("MicroPITA:: Error, did not get a result from analysis.")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1137 return -1
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1138 logging.info("End microPITA")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1139
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1140 #Log output for debugging
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1141 logging.debug("MicroPITA:: Returned the following samples:"+str(dictSelectedSamples))
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1142
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1143 #Write selection to file
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1144 microPITA.funcWriteSelectionToFile(dictSelection=dictSelectedSamples, xOutputFilePath=args.ostmOutput)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1145
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1146 if __name__ == "__main__":
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1147 _main( )