Mercurial > repos > george-weingart > micropita
annotate MicroPITA.py @ 32:041787cd0d31 draft default tip
Modified from StringIO import StringIO ## for Python 2 to from io import StringIO ## for Python 3
author | george-weingart |
---|---|
date | Wed, 23 Jun 2021 20:52:58 +0000 |
parents | 1d09ffab87a7 |
children |
rev | line source |
---|---|
0 | 1 #!/usr/bin/env python |
2 """ | |
3 Author: Timothy Tickle | |
4 Description: Class to Run analysis for the microPITA paper | |
5 """ | |
6 | |
7 ##################################################################################### | |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
8 # Copyright (C) <2012> |
0 | 9 # |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
10 # Permission is hereby granted, free of charge, to any person obtaining a copy of |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
11 # this software and associated documentation files (the "Software"), to deal in the |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
12 # Software without restriction, including without limitation the rights to use, copy, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
13 # modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
14 # and to permit persons to whom the Software is furnished to do so, subject to |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
15 # the following conditions: |
0 | 16 # |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
17 # The above copyright notice and this permission notice shall be included in all copies |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
18 # or substantial portions of the Software. |
0 | 19 # |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
20 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
21 # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
22 # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
23 # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
24 # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
25 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
0 | 26 ##################################################################################### |
27 | |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
28 from types import * |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
29 import scipy.spatial.distance |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
30 import scipy.cluster.hierarchy as hcluster |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
31 import random |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
32 import os |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
33 import operator |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
34 import numpy as np |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
35 import mlpy |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
36 import math |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
37 import logging |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
38 import csv |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
39 from src.ConstantsMicropita import ConstantsMicropita |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
40 from src.breadcrumbs.src.UtilityMath import UtilityMath |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
41 from src.breadcrumbs.src.SVM import SVM |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
42 from src.breadcrumbs.src.MLPYDistanceAdaptor import MLPYDistanceAdaptor |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
43 from src.breadcrumbs.src.KMedoids import Kmedoids |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
44 from src.breadcrumbs.src.Metric import Metric |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
45 from src.breadcrumbs.src.ConstantsBreadCrumbs import ConstantsBreadCrumbs |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
46 from src.breadcrumbs.src.AbundanceTable import AbundanceTable |
0 | 47 __author__ = "Timothy Tickle" |
48 __copyright__ = "Copyright 2012" | |
49 __credits__ = ["Timothy Tickle"] | |
50 __license__ = "MIT" | |
51 __maintainer__ = "Timothy Tickle" | |
52 __email__ = "ttickle@sph.harvard.edu" | |
53 __status__ = "Development" | |
54 | |
55 import sys | |
56 import argparse | |
16
7d25ecd225dd
Updated Micropita.py to suppres future warnings as this was causing a problem inn Galaxy
george.weingart@gmail.com
parents:
0
diff
changeset
|
57 import warnings |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
58 warnings.simplefilter(action="ignore", category=FutureWarning) |
0 | 59 |
60 | |
61 class MicroPITA: | |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
62 """ |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
63 Selects samples from a first tier of a multi-tiered study to be used in a second tier. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
64 Different methods can be used for selection. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
65 The expected input is an abundance table (and potentially a text file of targeted features, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
66 if using the targeted features option). Output is a list of samples exhibiting the |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
67 characteristics of interest. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
68 """ |
0 | 69 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
70 # Constants |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
71 # Diversity metrics Alpha |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
72 c_strInverseSimpsonDiversity = Metric.c_strInvSimpsonDiversity |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
73 c_strChao1Diversity = Metric.c_strChao1Diversity |
0 | 74 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
75 # Diversity metrics Beta |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
76 c_strBrayCurtisDissimilarity = Metric.c_strBrayCurtisDissimilarity |
0 | 77 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
78 # Additive inverses of diversity metrics beta |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
79 c_strInvBrayCurtisDissimilarity = Metric.c_strInvBrayCurtisDissimilarity |
0 | 80 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
81 # Technique Names |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
82 ConstantsMicropita.c_strDiversity2 = ConstantsMicropita.c_strDiversity+"_C" |
0 | 83 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
84 # Targeted feature settings |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
85 c_strTargetedRanked = ConstantsMicropita.c_strTargetedRanked |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
86 c_strTargetedAbundance = ConstantsMicropita.c_strTargetedAbundance |
0 | 87 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
88 # Technique groupings |
0 | 89 # c_lsDiversityMethods = [ConstantsMicropita.c_strDiversity,ConstantsMicropita.c_strDiversity2] |
90 | |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
91 # Converts ecology metrics into standardized method selection names |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
92 dictConvertAMetricDiversity = {c_strInverseSimpsonDiversity: ConstantsMicropita.c_strDiversity, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
93 c_strChao1Diversity: ConstantsMicropita.c_strDiversity2} |
0 | 94 # dictConvertMicroPITAToAMetric = {ConstantsMicropita.c_strDiversity:c_strInverseSimpsonDiversity, ConstantsMicropita.c_strDiversity2:c_strChao1Diversity} |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
95 dictConvertBMetricToMethod = { |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
96 c_strBrayCurtisDissimilarity: ConstantsMicropita.c_strRepresentative} |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
97 dictConvertInvBMetricToMethod = { |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
98 c_strBrayCurtisDissimilarity: ConstantsMicropita.c_strExtreme} |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
99 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
100 # Linkage used in the Hierarchical clustering |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
101 c_strHierarchicalClusterMethod = 'average' |
0 | 102 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
103 # Group 1## Diversity |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
104 # Testing: Happy path Testing (8) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
105 def funcGetTopRankedSamples(self, lldMatrix=None, lsSampleNames=None, iTopAmount=None): |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
106 """ |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
107 Given a list of lists of measurements, for each list the indices of the highest values are returned. If lsSamplesNames is given |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
108 it is treated as a list of string names that is in the order of the measurements in each list. Indices are returned or the sample |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
109 names associated with the indices. |
0 | 110 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
111 :param lldMatrix: List of lists [[value,value,value,value],[value,value,value,value]]. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
112 :type: List of lists List of measurements. Each list is a different measurement. Each measurement in positionally related to a sample. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
113 :param lsSampleNames: List of sample names positionally related (the same) to each list (Optional). |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
114 :type: List of strings List of strings. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
115 :param iTopAmount: The amount of top measured samples (assumes the higher measurements are better). |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
116 :type: integer Integer amount of sample names/ indices to return. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
117 :return List: List of samples to be selected. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
118 """ |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
119 topRankListRet = [] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
120 for rowMetrics in lldMatrix: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
121 # Create 2 d array to hold value and index and sort |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
122 liIndexX = [rowMetrics, range(len(rowMetrics))] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
123 liIndexX[1].sort(key=liIndexX[0].__getitem__, reverse=True) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
124 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
125 if lsSampleNames: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
126 topRankListRet.append([lsSampleNames[iIndex] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
127 for iIndex in liIndexX[1][:iTopAmount]]) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
128 else: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
129 topRankListRet.append(liIndexX[1][:iTopAmount]) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
130 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
131 return topRankListRet |
0 | 132 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
133 # Group 2## Representative Dissimilarity |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
134 # Testing: Happy path tested 1 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
135 def funcGetCentralSamplesByKMedoids(self, npaMatrix=None, sMetric=None, lsSampleNames=None, iNumberSamplesReturned=0, istmBetaMatrix=None, istrmTree=None, istrmEnvr=None): |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
136 """ |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
137 Gets centroid samples by k-medoids clustering of a given matrix. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
138 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
139 :param npaMatrix: Numpy array where row=features and columns=samples |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
140 :type: Numpy array Abundance Data. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
141 :param sMetric: String name of beta metric used as the distance metric. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
142 :type: String String name of beta metric. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
143 :param lsSampleNames: The names of the sample |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
144 :type: List List of strings |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
145 :param iNumberSamplesReturned: Number of samples to return, each will be a centroid of a sample. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
146 :type: Integer Number of samples to return |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
147 :return List: List of selected samples. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
148 :param istmBetaMatrix: File with beta-diversity matrix |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
149 :type: File stream or file path string |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
150 """ |
0 | 151 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
152 # Count of how many rows |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
153 sampleCount = npaMatrix.shape[0] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
154 if iNumberSamplesReturned > sampleCount: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
155 logging.error("MicroPITA.funcGetCentralSamplesByKMedoids:: There are not enough samples to return the amount of samples specified. Return sample count = " + |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
156 str(iNumberSamplesReturned)+". Sample number = "+str(sampleCount)+".") |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
157 return False |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
158 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
159 # If the cluster count is equal to the sample count return all samples |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
160 if sampleCount == iNumberSamplesReturned: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
161 return list(lsSampleNames) |
0 | 162 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
163 # Get distance matrix |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
164 distanceMatrix = scipy.spatial.distance.squareform(Metric.funcReadMatrixFile(istmMatrixFile=istmBetaMatrix, lsSampleOrder=lsSampleNames)[ |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
165 0]) if istmBetaMatrix else Metric.funcGetBetaMetric(npadAbundancies=npaMatrix, sMetric=sMetric, istrmTree=istrmTree, istrmEnvr=istrmEnvr, lsSampleOrder=lsSampleNames) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
166 if type(distanceMatrix) is BooleanType: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
167 logging.error( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
168 "MicroPITA.funcGetCentralSamplesByKMedoids:: Could not read in the supplied distance matrix, returning false.") |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
169 return False |
0 | 170 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
171 # Handle unifrac output |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
172 if sMetric in [Metric.c_strUnifracUnweighted, Metric.c_strUnifracWeighted]: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
173 distanceMatrix = distanceMatrix[0] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
174 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
175 # Log distance matrix |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
176 logging.debug( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
177 "MicroPITA.funcGetCentralSamplesByKMedoids:: Distance matrix for representative selection using metric="+str(sMetric)) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
178 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
179 distance = MLPYDistanceAdaptor( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
180 npaDistanceMatrix=distanceMatrix, fIsCondensedMatrix=True) |
0 | 181 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
182 # Create object to determine clusters/medoids |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
183 medoidsMaker = Kmedoids(k=iNumberSamplesReturned, dist=distance) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
184 # medoidsData includes(1d numpy array, medoids indexes; |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
185 # 1d numpy array, non-medoids indexes; |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
186 # 1d numpy array, cluster membership for non-medoids; |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
187 # double, cost of configuration) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
188 # npaMatrix is samples x rows |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
189 # Build a matrix of lists of indicies to pass to the distance matrix |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
190 lliIndicesMatrix = [[iIndexPosition] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
191 for iIndexPosition in xrange(0, len(npaMatrix))] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
192 medoidsData = medoidsMaker.compute(np.array(lliIndicesMatrix)) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
193 logging.debug( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
194 "MicroPITA.funcGetCentralSamplesByKMedoids:: Results from the kmedoid method in representative selection:") |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
195 logging.debug(str(medoidsData)) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
196 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
197 # If returning the same amount of clusters and samples |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
198 # Return centroids |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
199 selectedIndexes = medoidsData[0] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
200 return [lsSampleNames[selectedIndexes[index]] for index in xrange(0, iNumberSamplesReturned)] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
201 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
202 # Group 3## Highest Dissimilarity |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
203 # Testing: Happy path tested |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
204 def funcSelectExtremeSamplesFromHClust(self, strBetaMetric, npaAbundanceMatrix, lsSampleNames, iSelectSampleCount, istmBetaMatrix=None, istrmTree=None, istrmEnvr=None): |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
205 """ |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
206 Select extreme samples from HClustering. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
207 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
208 :param strBetaMetric: The beta metric to use for distance matrix generation. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
209 :type: String The name of the beta metric to use. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
210 :param npaAbundanceMatrix: Numpy array where row=samples and columns=features. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
211 :type: Numpy Array Abundance data. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
212 :param lsSampleNames: The names of the sample. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
213 :type: List List of strings. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
214 :param iSelectSampleCount: Number of samples to select (return). |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
215 :type: Integer Integer number of samples returned. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
216 :return Samples: List of samples. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
217 :param istmBetaMatrix: File with beta-diversity matrix |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
218 :type: File stream or file path string |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
219 """ |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
220 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
221 # If they want all the sample count, return all sample names |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
222 iSampleCount = len(npaAbundanceMatrix[:, 0]) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
223 if iSelectSampleCount == iSampleCount: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
224 return lsSampleNames |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
225 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
226 # Holds the samples to be returned |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
227 lsReturnSamplesRet = [] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
228 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
229 # Generate beta matrix |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
230 # Returns condensed matrix |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
231 tempDistanceMatrix = scipy.spatial.distance.squareform(Metric.funcReadMatrixFile(istmMatrixFile=istmBetaMatrix, lsSampleOrder=lsSampleNames)[0]) if istmBetaMatrix else Metric.funcGetBetaMetric( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
232 npadAbundancies=npaAbundanceMatrix, sMetric=strBetaMetric, istrmTree=istrmTree, istrmEnvr=istrmEnvr, lsSampleOrder=lsSampleNames, fAdditiveInverse=True) |
0 | 233 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
234 if strBetaMetric in [Metric.c_strUnifracUnweighted, Metric.c_strUnifracWeighted]: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
235 tempDistanceMatrix = tempDistanceMatrix[0] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
236 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
237 if type(tempDistanceMatrix) is BooleanType: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
238 logging.error( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
239 "MicroPITA.funcSelectExtremeSamplesFromHClust:: Could not read in the supplied distance matrix, returning false.") |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
240 return False |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
241 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
242 if istmBetaMatrix: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
243 tempDistanceMatrix = 1-tempDistanceMatrix |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
244 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
245 # Feed beta matrix to linkage to cluster |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
246 # Send condensed matrix |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
247 linkageMatrix = hcluster.linkage( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
248 tempDistanceMatrix, method=self.c_strHierarchicalClusterMethod) |
0 | 249 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
250 # Extract cluster information from dendrogram |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
251 # The linakge matrix is of the form |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
252 # [[int1 int2 doube int3],...] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
253 # int1 and int1 are the paired samples indexed at 0 and up. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
254 # each list is an entry for a branch that is number starting with the first |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
255 # list being sample count index + 1 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
256 # each list is then named by an increment as they appear |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
257 # this means that if a number is in the list and is = sample count or greater it is not |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
258 # terminal and is instead a branch. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
259 # This method just takes the lowest metric measurement (highest distance pairs/clusters) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
260 # Works much better than the original technique |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
261 # get total number of samples |
0 | 262 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
263 iCurrentSelectCount = 0 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
264 for row in linkageMatrix: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
265 # Get nodes ofthe lowest pairing (so the furthest apart pair) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
266 iNode1 = int(row[0]) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
267 iNode2 = int(row[1]) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
268 # Make sure the nodes are a terminal node (sample) and not a branch in the dendrogram |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
269 # The branching in the dendrogram will start at the number of samples and increment higher. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
270 # Add each of the pair one at a time breaking when enough samples are selected. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
271 if iNode1 < iSampleCount: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
272 lsReturnSamplesRet.append(lsSampleNames[iNode1]) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
273 iCurrentSelectCount = iCurrentSelectCount + 1 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
274 if iCurrentSelectCount == iSelectSampleCount: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
275 break |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
276 if iNode2 < iSampleCount: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
277 lsReturnSamplesRet.append(lsSampleNames[iNode2]) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
278 iCurrentSelectCount = iCurrentSelectCount + 1 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
279 if iCurrentSelectCount == iSelectSampleCount: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
280 break |
0 | 281 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
282 # Return selected samples |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
283 return lsReturnSamplesRet |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
284 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
285 # Group 4## Rank Average of user Defined Taxa |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
286 # Testing: Happy Path Tested |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
287 def funcGetAverageAbundanceSamples(self, abndTable, lsTargetedFeature, fRank=False): |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
288 """ |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
289 Averages feature abundance or ranked abundance. Expects a column 0 of taxa id that is skipped. |
0 | 290 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
291 :param abndTable: Abundance Table to analyse |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
292 :type: AbundanceTable Abundance Table |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
293 :param lsTargetedFeature: String names |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
294 :type: list list of string names of features (bugs) which are measured after ranking against the full sample |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
295 :param fRank: Indicates to rank the abundance before getting the average abundance of the features (default false) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
296 :type: boolean Flag indicating ranking abundance before calculating average feature measurement (false= no ranking) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
297 :return List of lists or boolean: List of lists or False on error. One internal list per sample indicating the sample, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
298 feature average abundance or ranked abundance. Lists will already be sorted. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
299 For not Ranked [[sample,average abundance of selected feature,1]] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
300 For Ranked [[sample,average ranked abundance, average abundance of selected feature]] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
301 Error Returns false |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
302 """ |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
303 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
304 llAbundance = abndTable.funcGetAverageAbundancePerSample( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
305 lsTargetedFeature) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
306 if not llAbundance: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
307 logging.error("MicroPITA.funcGetAverageAbundanceSamples:: Could not get average abundance, returned false. Make sure the features (bugs) are spelled correctly and in the abundance table.") |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
308 return False |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
309 # Add a space for ranking if needed |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
310 # Not ranked will be [[sSample,average abundance,1]] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
311 # (where 1 will not discriminant ties if used in later functions, so this generalizes) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
312 # Ranked will be [[sSample, average rank, average abundance]] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
313 llRetAbundance = [[llist[0], -1, llist[1]] for llist in llAbundance] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
314 # Rank if needed |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
315 if fRank: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
316 abndRanked = abndTable.funcRankAbundance() |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
317 if abndRanked == None: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
318 logging.error( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
319 "MicroPITA.funcGetAverageAbundanceSamples:: Could not rank the abundance table, returned false.") |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
320 return False |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
321 llRetRank = abndRanked.funcGetAverageAbundancePerSample( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
322 lsTargetedFeature) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
323 if not llRetRank: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
324 logging.error( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
325 "MicroPITA.funcGetAverageAbundanceSamples:: Could not get average ranked abundance, returned false. Make sure the features (bugs) are spelled correctly and in the abundance table.") |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
326 return False |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
327 dictRanks = dict(llRetRank) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
328 llRetAbundance = [[a[0], dictRanks[a[0]], a[2]] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
329 for a in llRetAbundance] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
330 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
331 # Sort first for ties and then for the main feature |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
332 if not fRank or ConstantsMicropita.c_fBreakRankTiesByDiversity: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
333 llRetAbundance = sorted( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
334 llRetAbundance, key=lambda sampleData: sampleData[2], reverse=not fRank) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
335 if fRank: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
336 llRetAbundance = sorted( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
337 llRetAbundance, key=lambda sampleData: sampleData[1], reverse=not fRank) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
338 return llRetAbundance |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
339 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
340 # Testing: Happy Path Tested |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
341 def funcSelectTargetedTaxaSamples(self, abndMatrix, lsTargetedTaxa, iSampleSelectionCount, sMethod=ConstantsMicropita.lsTargetedFeatureMethodValues[0]): |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
342 """ |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
343 Selects samples with the highest ranks or abundance of targeted features. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
344 If ranked, select the highest abundance for tie breaking |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
345 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
346 :param abndMatrix: Abundance table to analyse |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
347 :type: AbundanceTable Abundance table |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
348 :param lsTargetedTaxa: List of features |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
349 :type: list list of strings |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
350 :param iSampleSelectionCount: Number of samples to select |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
351 :type: integer integer |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
352 :param sMethod: Method to select targeted features |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
353 :type: string String (Can be values found in ConstantsMicropita.lsTargetedFeatureMethodValues) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
354 :return List of strings: List of sample names which were selected |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
355 List of strings Empty list is returned on an error. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
356 """ |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
357 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
358 # Check data |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
359 if(len(lsTargetedTaxa) < 1): |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
360 logging.error( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
361 "MicroPITA.funcSelectTargetedTaxaSamples. Taxa defined selection was requested but no features were given.") |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
362 return [] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
363 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
364 lsTargetedSamples = self.funcGetAverageAbundanceSamples(abndTable=abndMatrix, lsTargetedFeature=lsTargetedTaxa, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
365 fRank=sMethod.lower() == self.c_strTargetedRanked.lower()) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
366 # If an error occured or the key word for the method was not recognized |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
367 if lsTargetedSamples == False: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
368 logging.error("MicroPITA.funcSelectTargetedTaxaSamples:: Was not able to select for the features given. So targeted feature selection was performed. Check to make sure the features are spelled correctly and exist in the abundance file.") |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
369 return [] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
370 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
371 # Select from results |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
372 return [sSample[0] for sSample in lsTargetedSamples[:iSampleSelectionCount]] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
373 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
374 # Group 5## Random |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
375 # Testing: Happy path Tested |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
376 def funcGetRandomSamples(self, lsSamples=None, iNumberOfSamplesToReturn=0): |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
377 """ |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
378 Returns random sample names of the number given. No replacement. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
379 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
380 :param lsSamples: List of sample names |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
381 :type: list list of strings |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
382 :param iNumberOfSamplesToReturn: Number of samples to select |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
383 :type: integer integer. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
384 :return List: List of selected samples (strings). |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
385 """ |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
386 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
387 # Input matrix sample count |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
388 sampleCount = len(lsSamples) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
389 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
390 # Return the full matrix if they ask for a return matrix where length == original |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
391 if(iNumberOfSamplesToReturn >= sampleCount): |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
392 return lsSamples |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
393 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
394 # Get the random indices for the sample (without replacement) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
395 liRandomIndices = random.sample( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
396 range(sampleCount), iNumberOfSamplesToReturn) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
397 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
398 # Create a boolean array of if indexes are to be included in the reduced array |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
399 return [sSample for iIndex, sSample in enumerate(lsSamples) if iIndex in liRandomIndices] |
0 | 400 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
401 # Happy path tested (case 3) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
402 def funcGetAveragePopulation(self, abndTable, lfCompress): |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
403 """ |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
404 Get the average row per column in the abndtable. |
0 | 405 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
406 :param abndTable: AbundanceTable of data to be averaged |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
407 :type: AbudanceTable |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
408 :param lfCompress: List of boolean flags (false means to remove sample before averaging |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
409 :type: List of floats |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
410 :return List of doubles: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
411 """ |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
412 if sum(lfCompress) == 0: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
413 return [] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
414 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
415 # Get the average populations |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
416 lAverageRet = [] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
417 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
418 for sFeature in abndTable.funcGetAbundanceCopy(): |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
419 sFeature = list(sFeature)[1:] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
420 sFeature = np.compress(lfCompress, sFeature, axis=0) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
421 lAverageRet.append(sum(sFeature)/float(len(sFeature))) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
422 return lAverageRet |
0 | 423 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
424 # Happy path tested (2 cases) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
425 def funcGetDistanceFromAverage(self, abndTable, ldAverage, lsSamples, lfSelected): |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
426 """ |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
427 Given an abundance table and an average sample, this returns the distance of each sample |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
428 (measured using brays-curtis dissimilarity) from the average. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
429 The distances are reduced by needing to be in the lsSamples and being a true in the lfSelected |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
430 (which is associated with the samples in the order of the samples in the abundance table; |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
431 use abundancetable.funcGetSampleNames() to see the order if needed). |
0 | 432 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
433 :param abndTable: Abundance table holding the data to be analyzed. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
434 :type: AbundanceTable |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
435 :param ldAverage: Average population (Average features of the abundance table of samples) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
436 :type: List of doubles which represent the average population |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
437 :param lsSamples: These are the only samples used in the analysis |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
438 :type: List of strings (sample ids) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
439 :param lfSelected: Samples to be included in the analysis |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
440 :type: List of boolean (true means include) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
441 :return: List of distances (doubles) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
442 """ |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
443 # Get the distance from label 1 of all samples in label0 splitting into selected and not selected lists |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
444 ldSelectedDistances = [] |
0 | 445 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
446 for sSampleName in [sSample for iindex, sSample in enumerate(lsSamples) if lfSelected[iindex]]: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
447 # Get the sample measurements |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
448 ldSelectedDistances.append(Metric.funcGetBrayCurtisDissimilarity( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
449 np.array([abndTable.funcGetSample(sSampleName), ldAverage]))[0]) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
450 return ldSelectedDistances |
0 | 451 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
452 # Happy path tested (1 case) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
453 def funcMeasureDistanceFromLabelToAverageOtherLabel(self, abndTable, lfGroupOfInterest, lfGroupOther): |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
454 """ |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
455 Get the distance of samples from one label from the average sample of not the label. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
456 Note: This assumes 2 classes. |
0 | 457 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
458 :param abndTable: Table of data to work out of. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
459 :type: Abundace Table |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
460 :param lfGroupOfInterest: Boolean indicator of the sample being in the first group. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
461 :type: List of floats, true indicating an individual in the group of interest. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
462 :param lfGroupOther: Boolean indicator of the sample being in the other group. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
463 :type: List of floats, true indicating an individual in the |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
464 :return List of List of doubles: [list of tuples (string sample name,double distance) for the selected population, list of tuples for the not selected population] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
465 """ |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
466 # Get all sample names |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
467 lsAllSamples = abndTable.funcGetSampleNames() |
0 | 468 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
469 # Get average populations |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
470 lAverageOther = self.funcGetAveragePopulation( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
471 abndTable=abndTable, lfCompress=lfGroupOther) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
472 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
473 # Get the distance from the average of the other label (label 1) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
474 ldSelectedDistances = self.funcGetDistanceFromAverage(abndTable=abndTable, ldAverage=lAverageOther, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
475 lsSamples=lsAllSamples, lfSelected=lfGroupOfInterest) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
476 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
477 return zip([lsAllSamples[iindex] for iindex, fGroup in enumerate(lfGroupOfInterest) if fGroup], ldSelectedDistances) |
0 | 478 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
479 # Happy path tested (1 test case) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
480 def funcPerformDistanceSelection(self, abndTable, iSelectionCount, sLabel, sValueOfInterest): |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
481 """ |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
482 Given metadata, metadata of one value (sValueOfInterest) is measured from the average (centroid) value of another label group. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
483 An iSelectionCount of samples is selected from the group of interest closest to and furthest from the centroid of the other group. |
0 | 484 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
485 :params abndTable: Abundance of measurements |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
486 :type: AbundanceTable |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
487 :params iSelectionCount: The number of samples selected per sample. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
488 :type: Integer Integer greater than 0 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
489 :params sLabel: ID of the metadata which is the supervised label |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
490 :type: String |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
491 :params sValueOfInterest: Metadata value in the sLabel metadta row of the abundance table which defines the group of interest. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
492 :type: String found in the abundance table metadata row indicated by sLabel. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
493 :return list list of tuples (samplename, distance) [[iSelectionCount of tuples closest to the other centroid], [iSelectionCount of tuples farthest from the other centroid], [all tuples of samples not selected]] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
494 """ |
0 | 495 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
496 lsMetadata = abndTable.funcGetMetadata(sLabel) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
497 # Other metadata values |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
498 lsUniqueOtherValues = list(set(lsMetadata)-set(sValueOfInterest)) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
499 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
500 # Get boolean indicator of values of interest |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
501 lfLabelsInterested = [sValueOfInterest == |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
502 sValue for sValue in lsMetadata] |
0 | 503 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
504 # Get the distances of the items of interest from the other metadata values |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
505 dictDistanceAverages = {} |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
506 for sOtherLabel in lsUniqueOtherValues: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
507 # Get boolean indicator of labels not of interest |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
508 lfLabelsOther = [sOtherLabel == sValue for sValue in lsMetadata] |
0 | 509 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
510 # Get the distances of data from two different groups to the average of the other |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
511 ldValueDistances = dict(self.funcMeasureDistanceFromLabelToAverageOtherLabel( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
512 abndTable, lfLabelsInterested, lfLabelsOther)) |
0 | 513 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
514 for sKey in ldValueDistances: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
515 dictDistanceAverages[sKey] = ldValueDistances[sKey] + \ |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
516 dictDistanceAverages[sKey] if sKey in dictDistanceAverages else ldValueDistances[sKey] |
0 | 517 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
518 # Finish average by dividing by length of lsUniqueOtherValues |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
519 ltpleAverageDistances = [(sKey, dictDistanceAverages[sKey]/float( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
520 len(lsUniqueOtherValues))) for sKey in dictDistanceAverages] |
0 | 521 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
522 # Sort to extract extremes |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
523 ltpleAverageDistances = sorted( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
524 ltpleAverageDistances, key=operator.itemgetter(1)) |
0 | 525 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
526 # Get the closest and farthest distances |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
527 ltupleDiscriminantSamples = ltpleAverageDistances[:iSelectionCount] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
528 ltupleDistinctSamples = ltpleAverageDistances[iSelectionCount*-1:] |
0 | 529 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
530 # Remove the selected samples from the larger population of distances (better visualization) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
531 ldSelected = [tpleSelected[0] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
532 for tpleSelected in ltupleDiscriminantSamples+ltupleDistinctSamples] |
0 | 533 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
534 # Return discriminant tuples, distinct tuples, other tuples |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
535 return [ltupleDiscriminantSamples, ltupleDistinctSamples, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
536 [tplData for tplData in ltpleAverageDistances if tplData[0] not in ldSelected]] |
0 | 537 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
538 # Run the supervised method surrounding distance from centroids |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
539 # Happy path tested (3 test cases) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
540 def funcRunSupervisedDistancesFromCentroids(self, abundanceTable, fRunDistinct, fRunDiscriminant, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
541 xOutputSupFile, xPredictSupFile, strSupervisedMetadata, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
542 iSampleSupSelectionCount, lsOriginalSampleNames, lsOriginalLabels, fAppendFiles=False): |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
543 """ |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
544 Runs supervised methods based on measuring distances of one label from the centroid of another. NAs are evaluated as theirown group. |
0 | 545 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
546 :param abundanceTable: AbundanceTable |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
547 :type: AbudanceTable Data to analyze |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
548 :param fRunDistinct: Run distinct selection method |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
549 :type: Boolean boolean (true runs method) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
550 :param fRunDiscriminant: Run discriminant method |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
551 :type: Boolean boolean (true runs method) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
552 :param xOutputSupFile: File output from supervised methods detailing data going into the method. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
553 :type: String or FileStream |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
554 :param xPredictSupFile: File output from supervised methods distance results from supervised methods. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
555 :type: String or FileStream |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
556 :param strSupervisedMetadata: The metadata that will be used to group samples. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
557 :type: String |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
558 :param iSampleSupSelectionCount: Number of samples to select |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
559 :type: Integer int sample selection count |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
560 :param lsOriginalSampleNames: List of the sample names, order is important and should be preserved from the abundanceTable. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
561 :type: List of samples |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
562 :param fAppendFiles: Indicates that output files already exist and appending is occuring. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
563 :type: Boolean |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
564 :return Selected Samples: A dictionary of selected samples by selection ID |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
565 Dictionary {"Selection Method":["SampleID","SampleID"...]} |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
566 """ |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
567 # Get labels and run one label against many |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
568 lstrMetadata = abundanceTable.funcGetMetadata(strSupervisedMetadata) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
569 dictlltpleDistanceMeasurements = {} |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
570 for sMetadataValue in set(lstrMetadata): |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
571 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
572 # For now perform the selection here for the label of interest against the other labels |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
573 dictlltpleDistanceMeasurements.setdefault(sMetadataValue, []).extend(self.funcPerformDistanceSelection(abndTable=abundanceTable, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
574 iSelectionCount=iSampleSupSelectionCount, sLabel=strSupervisedMetadata, sValueOfInterest=sMetadataValue)) |
0 | 575 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
576 # Make expected output files for supervised methods |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
577 # 1. Output file which is similar to an input file for SVMs |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
578 # 2. Output file that is similar to the probabilitic output of a SVM (LibSVM) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
579 # Manly for making output of supervised methods (Distance from Centroid) similar |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
580 # MicropitaVis needs some of these files |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
581 if xOutputSupFile: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
582 if fAppendFiles: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
583 SVM.funcUpdateSVMFileWithAbundanceTable(abndAbundanceTable=abundanceTable, xOutputSVMFile=xOutputSupFile, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
584 lsOriginalLabels=lsOriginalLabels, lsSampleOrdering=lsOriginalSampleNames) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
585 else: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
586 SVM.funcConvertAbundanceTableToSVMFile(abndAbundanceTable=abundanceTable, xOutputSVMFile=xOutputSupFile, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
587 sMetadataLabel=strSupervisedMetadata, lsOriginalLabels=lsOriginalLabels, lsSampleOrdering=lsOriginalSampleNames) |
0 | 588 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
589 # Will contain the samples selected to return |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
590 # One or more of the methods may be active so this is why I am extending instead of just returning the result of each method type |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
591 dictSelectedSamplesRet = dict() |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
592 for sKey, ltplDistances in dictlltpleDistanceMeasurements.items(): |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
593 if fRunDistinct: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
594 dictSelectedSamplesRet.setdefault(ConstantsMicropita.c_strDistinct, []).extend([ |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
595 ltple[0] for ltple in ltplDistances[1]]) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
596 if fRunDiscriminant: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
597 dictSelectedSamplesRet.setdefault(ConstantsMicropita.c_strDiscriminant, []).extend([ |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
598 ltple[0] for ltple in ltplDistances[0]]) |
0 | 599 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
600 if xPredictSupFile: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
601 dictFlattenedDistances = dict() |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
602 [dictFlattenedDistances.setdefault(sKey, []).append(tple) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
603 for sKey, lltple in dictlltpleDistanceMeasurements.items() |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
604 for ltple in lltple for tple in ltple] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
605 if fAppendFiles: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
606 self._updatePredictFile(xPredictSupFile=xPredictSupFile, xInputLabelsFile=xOutputSupFile, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
607 dictltpleDistanceMeasurements=dictFlattenedDistances, abundanceTable=abundanceTable, lsOriginalSampleNames=lsOriginalSampleNames) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
608 else: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
609 self._writeToPredictFile(xPredictSupFile=xPredictSupFile, xInputLabelsFile=xOutputSupFile, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
610 dictltpleDistanceMeasurements=dictFlattenedDistances, abundanceTable=abundanceTable, lsOriginalSampleNames=lsOriginalSampleNames) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
611 return dictSelectedSamplesRet |
0 | 612 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
613 # Two happy path test cases |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
614 def _updatePredictFile(self, xPredictSupFile, xInputLabelsFile, dictltpleDistanceMeasurements, abundanceTable, lsOriginalSampleNames): |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
615 """ |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
616 Manages updating the predict file. |
0 | 617 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
618 :param xPredictSupFile: File that has predictions (distances) from the supervised method. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
619 :type: FileStream or String file path |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
620 :param xInputLabelsFile: File that as input to the supervised methods. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
621 :type: FileStream or String file path |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
622 :param dictltpleDistanceMeasurements: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
623 :type: Dictionary of lists of tuples {"labelgroup":[("SampleName",dDistance)...], "labelgroup":[("SampleName",dDistance)...]} |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
624 """ |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
625 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
626 if not isinstance(xPredictSupFile, str): |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
627 xPredictSupFile.close() |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
628 xPredictSupFile = xPredictSupFile.name |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
629 csvr = open(xPredictSupFile, 'r') |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
630 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
631 f = csv.reader( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
632 csvr, delimiter=ConstantsBreadCrumbs.c_strBreadCrumbsSVMSpace) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
633 lsHeader = f.next()[1:] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
634 dictlltpleRead = dict([(sHeader, []) for sHeader in lsHeader]) |
0 | 635 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
636 # Read data in |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
637 iSampleIndex = 0 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
638 for sRow in f: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
639 sLabel = sRow[0] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
640 [dictlltpleRead[lsHeader[iDistanceIndex]].append((lsOriginalSampleNames[iSampleIndex], dDistance)) for iDistanceIndex, dDistance in enumerate(sRow[1:]) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
641 if not dDistance == ConstantsMicropita.c_sEmptyPredictFileValue] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
642 iSampleIndex += 1 |
0 | 643 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
644 # Combine dictltpleDistanceMeasurements with new data |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
645 # If they share a key then merge keeping parameter data |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
646 # If they do not share the key, keep the full data |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
647 dictNew = {} |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
648 for sKey in dictltpleDistanceMeasurements.keys(): |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
649 lsSamples = [tple[0] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
650 for tple in dictltpleDistanceMeasurements[sKey]] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
651 dictNew[sKey] = dictltpleDistanceMeasurements[sKey]+[tple for tple in dictlltpleRead[sKey] if tple[0] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
652 not in lsSamples] if sKey in dictlltpleRead.keys() else dictltpleDistanceMeasurements[sKey] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
653 for sKey in dictlltpleRead: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
654 if sKey not in dictltpleDistanceMeasurements.keys(): |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
655 dictNew[sKey] = dictlltpleRead[sKey] |
0 | 656 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
657 # Call writer |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
658 self._writeToPredictFile(xPredictSupFile=xPredictSupFile, xInputLabelsFile=xInputLabelsFile, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
659 dictltpleDistanceMeasurements=dictNew, abundanceTable=abundanceTable, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
660 lsOriginalSampleNames=lsOriginalSampleNames, fFromUpdate=True) |
0 | 661 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
662 # 2 happy path test cases |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
663 def _writeToPredictFile(self, xPredictSupFile, xInputLabelsFile, dictltpleDistanceMeasurements, abundanceTable, lsOriginalSampleNames, fFromUpdate=False): |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
664 """ |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
665 Write to the predict file. |
0 | 666 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
667 :param xPredictSupFile: File that has predictions (distances) from the supervised method. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
668 :type: FileStream or String file path |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
669 :param xInputLabelsFile: File that as input to the supervised methods. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
670 :type: FileStream or String file path |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
671 :param dictltpleDistanceMeasurements: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
672 :type: Dictionary of lists of tuples {"labelgroup":[("SampleName",dDistance)...], "labelgroup":[("SampleName",dDistance)...]} |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
673 :param abundanceTable: An abundance table of the sample data. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
674 :type: AbundanceTable |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
675 :param lsOriginalSampleNames: Used if the file is being updated as the sample names so that it may be passed in and consistent with other writing. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
676 Otherwise will use the sample names from the abundance table. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
677 :type: List of strings |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
678 :param fFromUpdate: Indicates if this is part of an update to the file or not. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
679 :type: Boolean |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
680 """ |
0 | 681 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
682 xInputLabelsFileName = xInputLabelsFile |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
683 if not isinstance(xInputLabelsFile, str): |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
684 xInputLabelsFileName = xInputLabelsFile.name |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
685 f = csv.writer(open(xPredictSupFile, "w") if isinstance(xPredictSupFile, str) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
686 else xPredictSupFile, delimiter=ConstantsBreadCrumbs.c_strBreadCrumbsSVMSpace) |
0 | 687 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
688 lsAllSampleNames = abundanceTable.funcGetSampleNames() |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
689 lsLabels = SVM.funcReadLabelsFromFile(xSVMFile=xInputLabelsFileName, lsAllSampleNames=lsOriginalSampleNames if fFromUpdate else lsAllSampleNames, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
690 isPredictFile=False) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
691 dictLabels = dict([(sSample, sLabel) for sLabel in lsLabels.keys() |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
692 for sSample in lsLabels[sLabel]]) |
0 | 693 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
694 # Dictionay keys will be used to order the predict file |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
695 lsMeasurementKeys = dictltpleDistanceMeasurements.keys() |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
696 # Make header |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
697 f.writerow(["labels"]+lsMeasurementKeys) |
0 | 698 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
699 # Reformat dictionary to make it easier to use |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
700 for sKey in dictltpleDistanceMeasurements: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
701 dictltpleDistanceMeasurements[sKey] = dict( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
702 [ltpl for ltpl in dictltpleDistanceMeasurements[sKey]]) |
0 | 703 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
704 for sSample in lsOriginalSampleNames: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
705 # Make body of file |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
706 f.writerow([dictLabels.get(sSample, ConstantsMicropita.c_sEmptyPredictFileValue)] + |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
707 [str(dictltpleDistanceMeasurements[sKey].get(sSample, ConstantsMicropita.c_sEmptyPredictFileValue)) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
708 for sKey in lsMeasurementKeys]) |
0 | 709 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
710 def _funcRunNormalizeSensitiveMethods(self, abndData, iSampleSelectionCount, dictSelectedSamples, lsAlphaMetrics, lsBetaMetrics, lsInverseBetaMetrics, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
711 fRunDiversity, fRunRepresentative, fRunExtreme, strAlphaMetadata=None, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
712 istmBetaMatrix=None, istrmTree=None, istrmEnvr=None, fInvertDiversity=False): |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
713 """ |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
714 Manages running methods that are sensitive to normalization. This is called twice, once for the set of methods which should not be normalized and the other |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
715 for the set that should be normalized. |
0 | 716 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
717 :param abndData: Abundance table object holding the samples to be measured. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
718 :type: AbundanceTable |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
719 :param iSampleSelectionCount The number of samples to select per method. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
720 :type: Integer |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
721 :param dictSelectedSamples Will be added to as samples are selected {"Method:["strSelectedSampleID","strSelectedSampleID"...]}. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
722 :type: Dictionary |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
723 :param lsAlphaMetrics: List of alpha metrics to use on alpha metric dependent assays (like highest diversity). |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
724 :type: List of strings |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
725 :param lsBetaMetrics: List of beta metrics to use on beta metric dependent assays (like most representative). |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
726 :type: List of strings |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
727 :param lsInverseBetaMetrics: List of inverse beta metrics to use on inverse beta metric dependent assays (like most dissimilar). |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
728 :type: List of strings |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
729 :param fRunDiversity: Run Diversity based methods (true indicates run). |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
730 :type: Boolean |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
731 :param fRunRepresentative: Run Representative based methods (true indicates run). |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
732 :type: Boolean |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
733 :param fRunExtreme: Run Extreme based methods (true indicates run). |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
734 :type: Boolean |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
735 :param istmBetaMatrix: File that has a precalculated beta matrix |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
736 :type: File stream or File path string |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
737 :return Selected Samples: Samples selected by methods. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
738 Dictionary {"Selection Method":["SampleID","SampleID","SampleID",...]} |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
739 """ |
0 | 740 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
741 # Sample ids/names |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
742 lsSampleNames = abndData.funcGetSampleNames() |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
743 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
744 # Generate alpha metrics and get most diverse |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
745 if fRunDiversity: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
746 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
747 # Get Alpha metrics matrix |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
748 internalAlphaMatrix = None |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
749 # Name of technique |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
750 strMethod = [ |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
751 strAlphaMetadata] if strAlphaMetadata else lsAlphaMetrics |
0 | 752 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
753 # If given an alpha-diversity metadata |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
754 if strAlphaMetadata: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
755 internalAlphaMatrix = [ |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
756 [float(strNum) for strNum in abndData.funcGetMetadata(strAlphaMetadata)]] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
757 else: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
758 # Expects Observations (Taxa (row) x sample (column)) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
759 #Returns [[metric1-sample1, metric1-sample2, metric1-sample3],[metric1-sample1, metric1-sample2, metric1-sample3]] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
760 internalAlphaMatrix = Metric.funcBuildAlphaMetricsMatrix(npaSampleAbundance=abndData.funcGetAbundanceCopy() |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
761 if not abndData.funcIsSummed() |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
762 else abndData.funcGetFeatureAbundanceTable(abndData.funcGetTerminalNodes()).funcGetAbundanceCopy(), |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
763 lsSampleNames=lsSampleNames, lsDiversityMetricAlpha=lsAlphaMetrics) |
0 | 764 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
765 if internalAlphaMatrix: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
766 # Invert measurments |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
767 if fInvertDiversity: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
768 lldNewDiversity = [] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
769 for lsLine in internalAlphaMatrix: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
770 lldNewDiversity.append( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
771 [1/max(dValue, ConstantsMicropita.c_smallNumber) for dValue in lsLine]) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
772 internalAlphaMatrix = lldNewDiversity |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
773 # Get top ranked alpha diversity by most diverse |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
774 # Expects [[sample1,sample2,sample3...],[sample1,sample2,sample3..],...] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
775 #Returns [[sampleName1, sampleName2, sampleNameN],[sampleName1, sampleName2, sampleNameN]] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
776 mostDiverseAlphaSamplesIndexes = self.funcGetTopRankedSamples( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
777 lldMatrix=internalAlphaMatrix, lsSampleNames=lsSampleNames, iTopAmount=iSampleSelectionCount) |
0 | 778 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
779 # Add to results |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
780 for index in xrange(0, len(strMethod)): |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
781 strSelectionMethod = self.dictConvertAMetricDiversity.get( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
782 strMethod[index], ConstantsMicropita.c_strDiversity+"="+strMethod[index]) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
783 dictSelectedSamples.setdefault(strSelectionMethod, []).extend( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
784 mostDiverseAlphaSamplesIndexes[index]) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
785 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
786 logging.info( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
787 "MicroPITA.funcRunNormalizeSensitiveMethods:: Selected Samples 1b") |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
788 logging.info(dictSelectedSamples) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
789 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
790 # Generate beta metrics and |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
791 if fRunRepresentative or fRunExtreme: |
0 | 792 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
793 # Abundance matrix transposed |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
794 npaTransposedAbundance = UtilityMath.funcTransposeDataMatrix( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
795 abndData.funcGetAbundanceCopy(), fRemoveAdornments=True) |
0 | 796 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
797 # Get center selection using clusters/tiling |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
798 # This will be for beta metrics in normalized space |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
799 if fRunRepresentative: |
0 | 800 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
801 if istmBetaMatrix: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
802 # Get representative dissimilarity samples |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
803 medoidSamples = self.funcGetCentralSamplesByKMedoids(npaMatrix=npaTransposedAbundance, sMetric=ConstantsMicropita.c_custom, lsSampleNames=lsSampleNames, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
804 iNumberSamplesReturned=iSampleSelectionCount, istmBetaMatrix=istmBetaMatrix, istrmTree=istrmTree, istrmEnvr=istrmEnvr) |
0 | 805 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
806 if medoidSamples: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
807 dictSelectedSamples.setdefault( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
808 ConstantsMicropita.c_strRepresentative+"="+ConstantsMicropita.c_custom, []).extend(medoidSamples) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
809 else: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
810 logging.info( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
811 "MicroPITA.funcRunNormalizeSensitiveMethods:: Performing representative selection on normalized data.") |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
812 for bMetric in lsBetaMetrics: |
0 | 813 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
814 # Get representative dissimilarity samples |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
815 medoidSamples = self.funcGetCentralSamplesByKMedoids(npaMatrix=npaTransposedAbundance, sMetric=bMetric, lsSampleNames=lsSampleNames, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
816 iNumberSamplesReturned=iSampleSelectionCount, istmBetaMatrix=istmBetaMatrix, istrmTree=istrmTree, istrmEnvr=istrmEnvr) |
0 | 817 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
818 if medoidSamples: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
819 dictSelectedSamples.setdefault(self.dictConvertBMetricToMethod.get( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
820 bMetric, ConstantsMicropita.c_strRepresentative+"="+bMetric), []).extend(medoidSamples) |
0 | 821 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
822 # Get extreme selection using clusters, tiling |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
823 if fRunExtreme: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
824 logging.info( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
825 "MicroPITA.funcRunNormalizeSensitiveMethods:: Performing extreme selection on normalized data.") |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
826 if istmBetaMatrix: |
0 | 827 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
828 # Samples for representative dissimilarity |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
829 # This involves inverting the distance metric, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
830 # Taking the dendrogram level of where the number cluster == the number of samples to select |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
831 # Returning a repersentative sample from each cluster |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
832 extremeSamples = self.funcSelectExtremeSamplesFromHClust(strBetaMetric=ConstantsMicropita.c_custom, npaAbundanceMatrix=npaTransposedAbundance, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
833 lsSampleNames=lsSampleNames, iSelectSampleCount=iSampleSelectionCount, istmBetaMatrix=istmBetaMatrix, istrmTree=istrmTree, istrmEnvr=istrmEnvr) |
0 | 834 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
835 # Add selected samples |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
836 if extremeSamples: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
837 dictSelectedSamples.setdefault( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
838 ConstantsMicropita.c_strExtreme+"="+ConstantsMicropita.c_custom, []).extend(extremeSamples) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
839 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
840 else: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
841 # Run KMedoids with inverse custom distance metric in normalized space |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
842 for bMetric in lsInverseBetaMetrics: |
0 | 843 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
844 # Samples for representative dissimilarity |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
845 # This involves inverting the distance metric, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
846 # Taking the dendrogram level of where the number cluster == the number of samples to select |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
847 # Returning a repersentative sample from each cluster |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
848 extremeSamples = self.funcSelectExtremeSamplesFromHClust(strBetaMetric=bMetric, npaAbundanceMatrix=npaTransposedAbundance, lsSampleNames=lsSampleNames, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
849 iSelectSampleCount=iSampleSelectionCount, istmBetaMatrix=istmBetaMatrix, istrmTree=istrmTree, istrmEnvr=istrmEnvr) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
850 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
851 # Add selected samples |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
852 if extremeSamples: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
853 dictSelectedSamples.setdefault(self.dictConvertInvBMetricToMethod.get( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
854 bMetric, ConstantsMicropita.c_strExtreme+"="+bMetric), []).extend(extremeSamples) |
0 | 855 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
856 logging.info( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
857 "MicroPITA.funcRunNormalizeSensitiveMethods:: Selected Samples 2,3b") |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
858 logging.info(dictSelectedSamples) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
859 return dictSelectedSamples |
0 | 860 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
861 def funcRun(self, strIDName, strLastMetadataName, istmInput, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
862 ostmInputPredictFile, ostmPredictFile, ostmCheckedFile, ostmOutput, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
863 cDelimiter, cFeatureNameDelimiter, strFeatureSelection, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
864 istmFeatures, iCount, lstrMethods, strLastRowMetadata=None, strLabel=None, strStratify=None, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
865 strCustomAlpha=None, strCustomBeta=None, strAlphaMetadata=None, istmBetaMatrix=None, istrmTree=None, istrmEnvr=None, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
866 iMinSeqs=ConstantsMicropita.c_liOccurenceFilter[0], iMinSamples=ConstantsMicropita.c_liOccurenceFilter[1], fInvertDiversity=False): |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
867 """ |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
868 Manages the selection of samples given different metrics. |
0 | 869 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
870 :param strIDName: Sample Id metadata row |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
871 :type: String |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
872 :param strLastMetadataName: The id of the metadata positioned last in the abundance table. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
873 :type: String String metadata id. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
874 :param istmInput: File to store input data to supervised methods. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
875 :type: FileStream of String file path |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
876 :param ostmInputPredictFile: File to store distances from supervised methods. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
877 :type: FileStream or String file path |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
878 :param ostmCheckedFile: File to store the AbundanceTable data after it is being checked. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
879 :type: FileStream or String file path |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
880 :param ostmOutPut: File to store sample selection by methods of interest. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
881 :type: FileStream or String file path |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
882 :param cDelimiter: Delimiter of abundance table. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
883 :type: Character Char (default TAB). |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
884 :param cFeatureNameDelimiter: Delimiter of the name of features (for instance if they contain consensus lineages indicating clades). |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
885 :type: Character (default |). |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
886 :param stFeatureSelectionMethod: Which method to use to select features in a targeted manner (Using average ranked abundance or average abundance). |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
887 :type: String (specific values indicated in ConstantsMicropita.lsTargetedFeatureMethodValues). |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
888 :param istmFeatures: File which holds the features of interest if using targeted feature methodology. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
889 :type: FileStream or String file path |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
890 :param iCount: Number of samples to select in each methods, supervised methods select this amount per label if possible. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
891 :type: Integer integer. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
892 :param lstrMethods: List of strings indicating selection techniques. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
893 :type: List of string method names |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
894 :param strLabel: The metadata used for supervised labels. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
895 :type: String |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
896 :param strStratify: The metadata used to stratify unsupervised data. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
897 :type: String |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
898 :param strCustomAlpha: Custom alpha diversity metric |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
899 :type: String |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
900 :param strCustomBeta: Custom beta diversity metric |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
901 :type: String |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
902 :param strAlphaMetadata: Metadata id which is a diveristy metric to use in highest diversity sampling |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
903 :type: String |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
904 :param istmBetaMatrix: File containing precalculated beta-diversity matrix for representative sampling |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
905 :type: FileStream or String file path |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
906 :param istrmTree: File containing tree for phylogentic beta-diversity analysis |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
907 :type: FileStream or String file path |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
908 :param istrmEnvr: File containing environment for phylogentic beta-diversity analysis |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
909 :type: FileStream or String file path |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
910 :param iMinSeqs: Minimum sequence in the occurence filter which filters all features not with a minimum number of sequences in each of a minimum number of samples. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
911 :type: Integer |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
912 :param iMinSamples: Minimum sample count for the occurence filter. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
913 :type: Integer |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
914 :param fInvertDiversity: When true will invert diversity measurements before using. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
915 :type: boolean |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
916 :return Selected Samples: Samples selected by methods. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
917 Dictionary {"Selection Method":["SampleID","SampleID","SampleID",...]} |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
918 """ |
0 | 919 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
920 # Holds the top ranked samples from different metrics |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
921 # dict[metric name] = [samplename,samplename...] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
922 selectedSamples = dict() |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
923 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
924 # If a target feature file is given make sure that targeted feature is in the selection methods, if not add |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
925 if ConstantsMicropita.c_strFeature in lstrMethods: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
926 if not istmFeatures: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
927 logging.error( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
928 "MicroPITA.funcRun:: Did not receive both the Targeted feature file and the feature selection method. MicroPITA did not run.") |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
929 return False |
0 | 930 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
931 # Diversity metrics to run |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
932 # Use custom metrics if specified |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
933 # Custom beta metrics set to normalized only, custom alpha metrics set to count only |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
934 diversityMetricsAlpha = [] if strCustomAlpha or strAlphaMetadata else [ |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
935 MicroPITA.c_strInverseSimpsonDiversity] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
936 diversityMetricsBeta = [] if istmBetaMatrix else [ |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
937 strCustomBeta] if strCustomBeta else [MicroPITA.c_strBrayCurtisDissimilarity] |
0 | 938 # inverseDiversityMetricsBeta = [MicroPITA.c_strInvBrayCurtisDissimilarity] |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
939 diversityMetricsAlphaNoNormalize = [strAlphaMetadata] if strAlphaMetadata else [ |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
940 strCustomAlpha] if strCustomAlpha else [] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
941 diversityMetricsBetaNoNormalize = [] |
0 | 942 # inverseDiversityMetricsBetaNoNormalize = [] |
943 | |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
944 # Targeted taxa |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
945 userDefinedTaxa = [] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
946 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
947 # Perform different flows flags |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
948 c_RUN_MAX_DIVERSITY_1 = ConstantsMicropita.c_strDiversity in lstrMethods |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
949 c_RUN_REPRESENTIVE_DISSIMILARITY_2 = ConstantsMicropita.c_strRepresentative in lstrMethods |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
950 c_RUN_MAX_DISSIMILARITY_3 = ConstantsMicropita.c_strExtreme in lstrMethods |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
951 c_RUN_RANK_AVERAGE_USER_4 = False |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
952 if ConstantsMicropita.c_strFeature in lstrMethods: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
953 c_RUN_RANK_AVERAGE_USER_4 = True |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
954 if not istmFeatures: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
955 logging.error( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
956 "MicroPITA.funcRun:: No taxa file was given for taxa selection.") |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
957 return False |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
958 # Read in taxa list, break down to lines and filter out empty strings |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
959 userDefinedTaxa = filter(None, (s.strip() |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
960 for s in istmFeatures.readlines())) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
961 c_RUN_RANDOM_5 = ConstantsMicropita.c_strRandom in lstrMethods |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
962 c_RUN_DISTINCT = ConstantsMicropita.c_strDistinct in lstrMethods |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
963 c_RUN_DISCRIMINANT = ConstantsMicropita.c_strDiscriminant in lstrMethods |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
964 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
965 # Read in abundance data |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
966 # Abundance is a structured array. Samples (column) by Taxa (rows) with the taxa id row included as the column index=0 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
967 # Abundance table object to read in and manage data |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
968 totalAbundanceTable = AbundanceTable.funcMakeFromFile(xInputFile=istmInput, lOccurenceFilter=[iMinSeqs, iMinSamples], |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
969 cDelimiter=cDelimiter, sMetadataID=strIDName, sLastMetadataRow=strLastRowMetadata, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
970 sLastMetadata=strLastMetadataName, cFeatureNameDelimiter=cFeatureNameDelimiter, xOutputFile=ostmCheckedFile) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
971 if not totalAbundanceTable: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
972 logging.error("MicroPITA.funcRun:: Could not read in the abundance table. Analysis was not performed." + |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
973 " This often occurs when the Last Metadata is not specified correctly." + |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
974 " Please check to make sure the Last Metadata selection is the row of the last metadata," + |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
975 " all values after this selection should be microbial measurements and should be numeric.") |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
976 return False |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
977 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
978 lsOriginalLabels = SVM.funcMakeLabels( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
979 totalAbundanceTable.funcGetMetadata(strLabel)) if strLabel else strLabel |
0 | 980 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
981 dictTotalMetadata = totalAbundanceTable.funcGetMetadataCopy() |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
982 logging.debug("MicroPITA.funcRun:: Received metadata=" + |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
983 str(dictTotalMetadata)) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
984 # If there is only 1 unique value for the labels, do not run the Supervised methods |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
985 if strLabel and (len(set(dictTotalMetadata.get(strLabel, []))) < 2): |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
986 logging.error("The label " + strLabel + " did not have 2 or more values. Labels found=" + |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
987 str(dictTotalMetadata.get(strLabel, []))) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
988 return False |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
989 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
990 #Run unsupervised methods### |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
991 # Stratify the data if need be and drop the old data |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
992 lStratifiedAbundanceTables = totalAbundanceTable.funcStratifyByMetadata( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
993 strStratify) if strStratify else [totalAbundanceTable] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
994 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
995 # For each stratified abundance block or for the unstratfified abundance |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
996 # Run the unsupervised blocks |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
997 fAppendSupFiles = False |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
998 for stratAbundanceTable in lStratifiedAbundanceTables: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
999 logging.info("MicroPITA.funcRun:: Running abundance block:" + |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1000 stratAbundanceTable.funcGetName()) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1001 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1002 # NOT SUMMED, NOT NORMALIZED |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1003 # Only perform if the data is not yet normalized |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1004 if not stratAbundanceTable.funcIsNormalized(): |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1005 # Need to first work with unnormalized data |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1006 if c_RUN_MAX_DIVERSITY_1 or c_RUN_REPRESENTIVE_DISSIMILARITY_2 or c_RUN_MAX_DISSIMILARITY_3: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1007 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1008 self._funcRunNormalizeSensitiveMethods(abndData=stratAbundanceTable, iSampleSelectionCount=iCount, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1009 dictSelectedSamples=selectedSamples, lsAlphaMetrics=diversityMetricsAlphaNoNormalize, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1010 lsBetaMetrics=diversityMetricsBetaNoNormalize, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1011 lsInverseBetaMetrics=diversityMetricsBetaNoNormalize, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1012 fRunDiversity=c_RUN_MAX_DIVERSITY_1, fRunRepresentative=c_RUN_REPRESENTIVE_DISSIMILARITY_2, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1013 fRunExtreme=c_RUN_MAX_DISSIMILARITY_3, strAlphaMetadata=strAlphaMetadata, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1014 istrmTree=istrmTree, istrmEnvr=istrmEnvr, fInvertDiversity=fInvertDiversity) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1015 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1016 # Generate selection by the rank average of user defined taxa |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1017 # Expects (Taxa (row) by Samples (column)) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1018 # Expects a column 0 of taxa id that is skipped |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1019 # Returns [(sample name,average,rank)] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1020 # SUMMED AND NORMALIZED |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1021 stratAbundanceTable.funcSumClades() |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1022 # Normalize data at this point |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1023 stratAbundanceTable.funcNormalize() |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1024 if c_RUN_RANK_AVERAGE_USER_4: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1025 selectedSamples[ConstantsMicropita.c_strFeature] = self.funcSelectTargetedTaxaSamples(abndMatrix=stratAbundanceTable, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1026 lsTargetedTaxa=userDefinedTaxa, iSampleSelectionCount=iCount, sMethod=strFeatureSelection) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1027 logging.info("MicroPITA.funcRun:: Selected Samples Rank") |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1028 logging.info(selectedSamples) |
0 | 1029 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1030 # SUMMED AND NORMALIZED analysis block |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1031 # Diversity based metric will move reduce to terminal taxa as needed |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1032 if c_RUN_MAX_DIVERSITY_1 or c_RUN_REPRESENTIVE_DISSIMILARITY_2 or c_RUN_MAX_DISSIMILARITY_3: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1033 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1034 self._funcRunNormalizeSensitiveMethods(abndData=stratAbundanceTable, iSampleSelectionCount=iCount, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1035 dictSelectedSamples=selectedSamples, lsAlphaMetrics=diversityMetricsAlpha, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1036 lsBetaMetrics=diversityMetricsBeta, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1037 lsInverseBetaMetrics=diversityMetricsBeta, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1038 fRunDiversity=c_RUN_MAX_DIVERSITY_1, fRunRepresentative=c_RUN_REPRESENTIVE_DISSIMILARITY_2, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1039 fRunExtreme=c_RUN_MAX_DISSIMILARITY_3, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1040 istmBetaMatrix=istmBetaMatrix, istrmTree=istrmTree, istrmEnvr=istrmEnvr, fInvertDiversity=fInvertDiversity) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1041 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1042 # 5::Select randomly |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1043 # Expects sampleNames = List of sample names [name, name, name...] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1044 if(c_RUN_RANDOM_5): |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1045 # Select randomly from sample names |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1046 selectedSamples[ConstantsMicropita.c_strRandom] = self.funcGetRandomSamples( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1047 lsSamples=stratAbundanceTable.funcGetSampleNames(), iNumberOfSamplesToReturn=iCount) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1048 logging.info("MicroPITA.funcRun:: Selected Samples Random") |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1049 logging.info(selectedSamples) |
0 | 1050 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1051 # Perform supervised selection |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1052 if c_RUN_DISTINCT or c_RUN_DISCRIMINANT: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1053 if strLabel: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1054 dictSelectionRet = self.funcRunSupervisedDistancesFromCentroids(abundanceTable=stratAbundanceTable, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1055 fRunDistinct=c_RUN_DISTINCT, fRunDiscriminant=c_RUN_DISCRIMINANT, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1056 xOutputSupFile=ostmInputPredictFile, xPredictSupFile=ostmPredictFile, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1057 strSupervisedMetadata=strLabel, iSampleSupSelectionCount=iCount, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1058 lsOriginalSampleNames=totalAbundanceTable.funcGetSampleNames(), |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1059 lsOriginalLabels=lsOriginalLabels, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1060 fAppendFiles=fAppendSupFiles) |
0 | 1061 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1062 [selectedSamples.setdefault(sKey, []).extend( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1063 lValue) for sKey, lValue in dictSelectionRet.items()] |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1064 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1065 if not fAppendSupFiles: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1066 fAppendSupFiles = True |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1067 logging.info( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1068 "MicroPITA.funcRun:: Selected Samples Unsupervised") |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1069 logging.info(selectedSamples) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1070 return selectedSamples |
0 | 1071 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1072 # Testing: Happy path tested |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1073 @staticmethod |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1074 def funcWriteSelectionToFile(dictSelection, xOutputFilePath): |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1075 """ |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1076 Writes the selection of samples by method to an output file. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1077 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1078 :param dictSelection: The dictionary of selections by method to be written to a file. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1079 :type: Dictionary The dictionary of selections by method {"method":["sample selected","sample selected"...]} |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1080 :param xOutputFilePath: FileStream or String path to file inwhich the dictionary is written. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1081 :type: String FileStream or String path to file |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1082 """ |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1083 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1084 if not dictSelection: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1085 return |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1086 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1087 # Open file |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1088 f = csv.writer(open(xOutputFilePath, "w") if isinstance( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1089 xOutputFilePath, str) else xOutputFilePath, delimiter=ConstantsMicropita.c_outputFileDelim) |
0 | 1090 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1091 # Create output content from dictionary |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1092 for sKey in dictSelection: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1093 f.writerow([sKey]+dictSelection[sKey]) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1094 logging.debug( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1095 "MicroPITA.funcRun:: Selected samples output to file:"+str(dictSelection[sKey])) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1096 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1097 # Testing: Happy Path tested |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1098 @staticmethod |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1099 def funcReadSelectionFileToDictionary(xInputFile): |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1100 """ |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1101 Reads in an output selection file from micropita and formats it into a dictionary. |
0 | 1102 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1103 :param xInputFile: String path to file or file stream to read and translate into a dictionary. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1104 {"method":["sample selected","sample selected"...]} |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1105 :type: FileStream or String Path to file |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1106 :return Dictionary: Samples selected by methods. |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1107 Dictionary {"Selection Method":["SampleID","SampleID","SampleID",...]} |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1108 """ |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1109 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1110 # Open file |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1111 istmReader = csv.reader(open(xInputFile, 'r') if isinstance( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1112 xInputFile, str) else xInputFile, delimiter=ConstantsMicropita.c_outputFileDelim) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1113 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1114 # Dictionary to hold selection data |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1115 return dict([(lsLine[0], lsLine[1:]) for lsLine in istmReader]) |
0 | 1116 |
1117 | |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1118 # Set up arguments reader |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1119 argp = argparse.ArgumentParser(prog="MicroPITA.py", |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1120 description="""Selects samples from abundance tables based on various selection schemes.""") |
0 | 1121 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1122 args = argp.add_argument_group("Common", "Commonly modified options") |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1123 args.add_argument(ConstantsMicropita.c_strCountArgument, "--num", dest="iCount", |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1124 metavar="samples", default=10, type=int, help=ConstantsMicropita.c_strCountHelp) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1125 args.add_argument("-m", "--method", dest="lstrMethods", metavar="method", default=[], help=ConstantsMicropita.c_strSelectionTechniquesHelp, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1126 choices=ConstantsMicropita.c_lsAllMethods, action="append") |
0 | 1127 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1128 args = argp.add_argument_group( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1129 "Custom", "Selecting and inputing custom metrics") |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1130 args.add_argument("-a", "--alpha", dest="strAlphaDiversity", metavar="AlphaDiversity", default=None, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1131 help=ConstantsMicropita.c_strCustomAlphaDiversityHelp, choices=Metric.setAlphaDiversities) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1132 args.add_argument("-b", "--beta", dest="strBetaDiversity", metavar="BetaDiversity", default=None, help=ConstantsMicropita.c_strCustomBetaDiversityHelp, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1133 choices=list(Metric.setBetaDiversities)+[Metric.c_strUnifracUnweighted, Metric.c_strUnifracWeighted]) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1134 args.add_argument("-q", "--alphameta", dest="strAlphaMetadata", metavar="AlphaDiversityMetadata", |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1135 default=None, help=ConstantsMicropita.c_strCustomAlphaDiversityMetadataHelp) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1136 args.add_argument("-x", "--betamatrix", dest="istmBetaMatrix", metavar="BetaDiversityMatrix", |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1137 default=None, help=ConstantsMicropita.c_strCustomBetaDiversityMatrixHelp) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1138 args.add_argument("-o", "--tree", dest="istrmTree", metavar="PhylogeneticTree", |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1139 default=None, help=ConstantsMicropita.c_strCustomPhylogeneticTreeHelp) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1140 args.add_argument("-i", "--envr", dest="istrmEnvr", metavar="EnvironmentFile", |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1141 default=None, help=ConstantsMicropita.c_strCustomEnvironmentFileHelp) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1142 args.add_argument("-f", "--invertDiversity", dest="fInvertDiversity", action="store_true", |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1143 default=False, help=ConstantsMicropita.c_strInvertDiversityHelp) |
0 | 1144 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1145 args = argp.add_argument_group( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1146 "Miscellaneous", "Row/column identifiers and feature targeting options") |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1147 args.add_argument("-d", ConstantsMicropita.c_strIDNameArgument, dest="strIDName", |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1148 metavar="sample_id", help=ConstantsMicropita.c_strIDNameHelp) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1149 args.add_argument("-l", ConstantsMicropita.c_strLastMetadataNameArgument, dest="strLastMetadataName", metavar="metadata_id", default=None, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1150 help=ConstantsMicropita.c_strLastMetadataNameHelp) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1151 args.add_argument("-r", ConstantsMicropita.c_strTargetedFeatureMethodArgument, dest="strFeatureSelection", metavar="targeting_method", default=ConstantsMicropita.lsTargetedFeatureMethodValues[0], |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1152 choices=ConstantsMicropita.lsTargetedFeatureMethodValues, help=ConstantsMicropita.c_strTargetedFeatureMethodHelp) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1153 args.add_argument("-t", ConstantsMicropita.c_strTargetedSelectionFileArgument, dest="istmFeatures", |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1154 metavar="feature_file", type=argparse.FileType("rU"), help=ConstantsMicropita.c_strTargetedSelectionFileHelp) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1155 args.add_argument("-w", ConstantsMicropita.c_strFeatureMetadataArgument, dest="strLastFeatureMetadata", |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1156 metavar="Last_Feature_Metadata", default=None, help=ConstantsMicropita.c_strFeatureMetadataHelp) |
0 | 1157 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1158 args = argp.add_argument_group( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1159 "Data labeling", "Metadata IDs for strata and supervised label values") |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1160 args.add_argument("-e", ConstantsMicropita.c_strSupervisedLabelArgument, dest="strLabel", |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1161 metavar="supervised_id", help=ConstantsMicropita.c_strSupervisedLabelHelp) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1162 args.add_argument("-s", ConstantsMicropita.c_strUnsupervisedStratifyMetadataArgument, dest="strUnsupervisedStratify", metavar="stratify_id", |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1163 help=ConstantsMicropita.c_strUnsupervisedStratifyMetadataHelp) |
0 | 1164 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1165 args = argp.add_argument_group( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1166 "File formatting", "Rarely modified file formatting options") |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1167 args.add_argument("-j", ConstantsMicropita.c_strFileDelimiterArgument, dest="cFileDelimiter", |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1168 metavar="column_delimiter", default="\t", help=ConstantsMicropita.c_strFileDelimiterHelp) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1169 args.add_argument("-k", ConstantsMicropita.c_strFeatureNameDelimiterArgument, dest="cFeatureNameDelimiter", |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1170 metavar="taxonomy_delimiter", default="|", help=ConstantsMicropita.c_strFeatureNameDelimiterHelp) |
0 | 1171 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1172 args = argp.add_argument_group( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1173 "Debugging", "Debugging options - modify at your own risk!") |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1174 args.add_argument("-v", ConstantsMicropita.c_strLoggingArgument, dest="strLogLevel", metavar="log_level", default="WARNING", |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1175 choices=ConstantsMicropita.c_lsLoggingChoices, help=ConstantsMicropita.c_strLoggingHelp) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1176 args.add_argument("-c", ConstantsMicropita.c_strCheckedAbundanceFileArgument, dest="ostmCheckedFile", |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1177 metavar="output_qc", type=argparse.FileType("w"), help=ConstantsMicropita.c_strCheckedAbundanceFileHelp) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1178 args.add_argument("-g", ConstantsMicropita.c_strLoggingFileArgument, dest="ostmLoggingFile", |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1179 metavar="output_log", type=argparse.FileType("w"), help=ConstantsMicropita.c_strLoggingFileHelp) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1180 args.add_argument("-u", ConstantsMicropita.c_strSupervisedInputFile, dest="ostmInputPredictFile", |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1181 metavar="output_scaled", type=argparse.FileType("w"), help=ConstantsMicropita.c_strSupervisedInputFileHelp) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1182 args.add_argument("-p", ConstantsMicropita.c_strSupervisedPredictedFile, dest="ostmPredictFile", |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1183 metavar="output_labels", type=argparse.FileType("w"), help=ConstantsMicropita.c_strSupervisedPredictedFileHelp) |
0 | 1184 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1185 argp.add_argument("istmInput", metavar="input.pcl/biome", type=argparse.FileType("rU"), help=ConstantsMicropita.c_strAbundanceFileHelp, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1186 default=sys.stdin) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1187 argp.add_argument("ostmOutput", metavar="output.txt", type=argparse.FileType("w"), help=ConstantsMicropita.c_strGenericOutputDataFileHelp, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1188 default=sys.stdout) |
0 | 1189 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1190 __doc__ = "::\n\n\t" + argp.format_help().replace("\n", "\n\t") + __doc__ |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1191 |
0 | 1192 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1193 def _main(): |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1194 args = argp.parse_args() |
0 | 1195 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1196 # Set up logger |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1197 iLogLevel = getattr(logging, args.strLogLevel.upper(), None) |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1198 logging.basicConfig( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1199 stream=args.ostmLoggingFile if args.ostmLoggingFile else sys.stderr, filemode='w', level=iLogLevel) |
0 | 1200 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1201 # Run micropita |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1202 logging.info("MicroPITA:: Start microPITA") |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1203 microPITA = MicroPITA() |
0 | 1204 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1205 # Argparse will append to the default but will not remove the default so I do this here |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1206 if not len(args.lstrMethods): |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1207 args.lstrMethods = [ConstantsMicropita.c_strRepresentative] |
0 | 1208 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1209 dictSelectedSamples = microPITA.funcRun( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1210 strIDName=args.strIDName, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1211 strLastMetadataName=args.strLastMetadataName, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1212 istmInput=args.istmInput, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1213 ostmInputPredictFile=args.ostmInputPredictFile, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1214 ostmPredictFile=args.ostmPredictFile, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1215 ostmCheckedFile=args.ostmCheckedFile, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1216 ostmOutput=args.ostmOutput, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1217 cDelimiter=args.cFileDelimiter, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1218 cFeatureNameDelimiter=args.cFeatureNameDelimiter, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1219 istmFeatures=args.istmFeatures, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1220 strFeatureSelection=args.strFeatureSelection, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1221 iCount=args.iCount, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1222 strLastRowMetadata=args.strLastFeatureMetadata, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1223 strLabel=args.strLabel, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1224 strStratify=args.strUnsupervisedStratify, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1225 strCustomAlpha=args.strAlphaDiversity, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1226 strCustomBeta=args.strBetaDiversity, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1227 strAlphaMetadata=args.strAlphaMetadata, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1228 istmBetaMatrix=args.istmBetaMatrix, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1229 istrmTree=args.istrmTree, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1230 istrmEnvr=args.istrmEnvr, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1231 lstrMethods=args.lstrMethods, |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1232 fInvertDiversity=args.fInvertDiversity |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1233 ) |
0 | 1234 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1235 if not dictSelectedSamples: |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1236 logging.error("MicroPITA:: Error, did not get a result from analysis.") |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1237 return -1 |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1238 logging.info("End microPITA") |
0 | 1239 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1240 # Log output for debugging |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1241 logging.debug("MicroPITA:: Returned the following samples:" + |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1242 str(dictSelectedSamples)) |
0 | 1243 |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1244 # Write selection to file |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1245 microPITA.funcWriteSelectionToFile( |
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1246 dictSelection=dictSelectedSamples, xOutputFilePath=args.ostmOutput) |
0 | 1247 |
1248 | |
1249 if __name__ == "__main__": | |
28
1d09ffab87a7
Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents:
16
diff
changeset
|
1250 _main() |