annotate MicroPITA.py @ 32:041787cd0d31 draft default tip

Modified from StringIO import StringIO ## for Python 2 to from io import StringIO ## for Python 3
author george-weingart
date Wed, 23 Jun 2021 20:52:58 +0000
parents 1d09ffab87a7
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1 #!/usr/bin/env python
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
2 """
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
3 Author: Timothy Tickle
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
4 Description: Class to Run analysis for the microPITA paper
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
5 """
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
6
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
7 #####################################################################################
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
8 # Copyright (C) <2012>
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
9 #
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
10 # Permission is hereby granted, free of charge, to any person obtaining a copy of
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
11 # this software and associated documentation files (the "Software"), to deal in the
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
12 # Software without restriction, including without limitation the rights to use, copy,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
13 # modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
14 # and to permit persons to whom the Software is furnished to do so, subject to
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
15 # the following conditions:
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
16 #
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
17 # The above copyright notice and this permission notice shall be included in all copies
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
18 # or substantial portions of the Software.
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
19 #
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
20 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
21 # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
22 # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
23 # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
24 # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
25 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
26 #####################################################################################
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
27
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
28 from types import *
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
29 import scipy.spatial.distance
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
30 import scipy.cluster.hierarchy as hcluster
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
31 import random
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
32 import os
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
33 import operator
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
34 import numpy as np
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
35 import mlpy
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
36 import math
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
37 import logging
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
38 import csv
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
39 from src.ConstantsMicropita import ConstantsMicropita
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
40 from src.breadcrumbs.src.UtilityMath import UtilityMath
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
41 from src.breadcrumbs.src.SVM import SVM
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
42 from src.breadcrumbs.src.MLPYDistanceAdaptor import MLPYDistanceAdaptor
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
43 from src.breadcrumbs.src.KMedoids import Kmedoids
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
44 from src.breadcrumbs.src.Metric import Metric
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
45 from src.breadcrumbs.src.ConstantsBreadCrumbs import ConstantsBreadCrumbs
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
46 from src.breadcrumbs.src.AbundanceTable import AbundanceTable
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
47 __author__ = "Timothy Tickle"
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
48 __copyright__ = "Copyright 2012"
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
49 __credits__ = ["Timothy Tickle"]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
50 __license__ = "MIT"
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
51 __maintainer__ = "Timothy Tickle"
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
52 __email__ = "ttickle@sph.harvard.edu"
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
53 __status__ = "Development"
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
54
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
55 import sys
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
56 import argparse
16
7d25ecd225dd Updated Micropita.py to suppres future warnings as this was causing a problem inn Galaxy
george.weingart@gmail.com
parents: 0
diff changeset
57 import warnings
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
58 warnings.simplefilter(action="ignore", category=FutureWarning)
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
59
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
60
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
61 class MicroPITA:
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
62 """
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
63 Selects samples from a first tier of a multi-tiered study to be used in a second tier.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
64 Different methods can be used for selection.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
65 The expected input is an abundance table (and potentially a text file of targeted features,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
66 if using the targeted features option). Output is a list of samples exhibiting the
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
67 characteristics of interest.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
68 """
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
69
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
70 # Constants
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
71 # Diversity metrics Alpha
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
72 c_strInverseSimpsonDiversity = Metric.c_strInvSimpsonDiversity
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
73 c_strChao1Diversity = Metric.c_strChao1Diversity
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
74
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
75 # Diversity metrics Beta
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
76 c_strBrayCurtisDissimilarity = Metric.c_strBrayCurtisDissimilarity
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
77
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
78 # Additive inverses of diversity metrics beta
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
79 c_strInvBrayCurtisDissimilarity = Metric.c_strInvBrayCurtisDissimilarity
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
80
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
81 # Technique Names
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
82 ConstantsMicropita.c_strDiversity2 = ConstantsMicropita.c_strDiversity+"_C"
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
83
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
84 # Targeted feature settings
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
85 c_strTargetedRanked = ConstantsMicropita.c_strTargetedRanked
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
86 c_strTargetedAbundance = ConstantsMicropita.c_strTargetedAbundance
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
87
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
88 # Technique groupings
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
89 # c_lsDiversityMethods = [ConstantsMicropita.c_strDiversity,ConstantsMicropita.c_strDiversity2]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
90
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
91 # Converts ecology metrics into standardized method selection names
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
92 dictConvertAMetricDiversity = {c_strInverseSimpsonDiversity: ConstantsMicropita.c_strDiversity,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
93 c_strChao1Diversity: ConstantsMicropita.c_strDiversity2}
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
94 # dictConvertMicroPITAToAMetric = {ConstantsMicropita.c_strDiversity:c_strInverseSimpsonDiversity, ConstantsMicropita.c_strDiversity2:c_strChao1Diversity}
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
95 dictConvertBMetricToMethod = {
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
96 c_strBrayCurtisDissimilarity: ConstantsMicropita.c_strRepresentative}
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
97 dictConvertInvBMetricToMethod = {
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
98 c_strBrayCurtisDissimilarity: ConstantsMicropita.c_strExtreme}
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
99
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
100 # Linkage used in the Hierarchical clustering
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
101 c_strHierarchicalClusterMethod = 'average'
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
102
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
103 # Group 1## Diversity
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
104 # Testing: Happy path Testing (8)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
105 def funcGetTopRankedSamples(self, lldMatrix=None, lsSampleNames=None, iTopAmount=None):
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
106 """
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
107 Given a list of lists of measurements, for each list the indices of the highest values are returned. If lsSamplesNames is given
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
108 it is treated as a list of string names that is in the order of the measurements in each list. Indices are returned or the sample
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
109 names associated with the indices.
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
110
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
111 :param lldMatrix: List of lists [[value,value,value,value],[value,value,value,value]].
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
112 :type: List of lists List of measurements. Each list is a different measurement. Each measurement in positionally related to a sample.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
113 :param lsSampleNames: List of sample names positionally related (the same) to each list (Optional).
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
114 :type: List of strings List of strings.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
115 :param iTopAmount: The amount of top measured samples (assumes the higher measurements are better).
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
116 :type: integer Integer amount of sample names/ indices to return.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
117 :return List: List of samples to be selected.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
118 """
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
119 topRankListRet = []
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
120 for rowMetrics in lldMatrix:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
121 # Create 2 d array to hold value and index and sort
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
122 liIndexX = [rowMetrics, range(len(rowMetrics))]
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
123 liIndexX[1].sort(key=liIndexX[0].__getitem__, reverse=True)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
124
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
125 if lsSampleNames:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
126 topRankListRet.append([lsSampleNames[iIndex]
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
127 for iIndex in liIndexX[1][:iTopAmount]])
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
128 else:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
129 topRankListRet.append(liIndexX[1][:iTopAmount])
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
130
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
131 return topRankListRet
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
132
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
133 # Group 2## Representative Dissimilarity
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
134 # Testing: Happy path tested 1
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
135 def funcGetCentralSamplesByKMedoids(self, npaMatrix=None, sMetric=None, lsSampleNames=None, iNumberSamplesReturned=0, istmBetaMatrix=None, istrmTree=None, istrmEnvr=None):
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
136 """
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
137 Gets centroid samples by k-medoids clustering of a given matrix.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
138
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
139 :param npaMatrix: Numpy array where row=features and columns=samples
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
140 :type: Numpy array Abundance Data.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
141 :param sMetric: String name of beta metric used as the distance metric.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
142 :type: String String name of beta metric.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
143 :param lsSampleNames: The names of the sample
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
144 :type: List List of strings
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
145 :param iNumberSamplesReturned: Number of samples to return, each will be a centroid of a sample.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
146 :type: Integer Number of samples to return
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
147 :return List: List of selected samples.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
148 :param istmBetaMatrix: File with beta-diversity matrix
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
149 :type: File stream or file path string
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
150 """
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
151
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
152 # Count of how many rows
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
153 sampleCount = npaMatrix.shape[0]
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
154 if iNumberSamplesReturned > sampleCount:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
155 logging.error("MicroPITA.funcGetCentralSamplesByKMedoids:: There are not enough samples to return the amount of samples specified. Return sample count = " +
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
156 str(iNumberSamplesReturned)+". Sample number = "+str(sampleCount)+".")
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
157 return False
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
158
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
159 # If the cluster count is equal to the sample count return all samples
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
160 if sampleCount == iNumberSamplesReturned:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
161 return list(lsSampleNames)
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
162
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
163 # Get distance matrix
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
164 distanceMatrix = scipy.spatial.distance.squareform(Metric.funcReadMatrixFile(istmMatrixFile=istmBetaMatrix, lsSampleOrder=lsSampleNames)[
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
165 0]) if istmBetaMatrix else Metric.funcGetBetaMetric(npadAbundancies=npaMatrix, sMetric=sMetric, istrmTree=istrmTree, istrmEnvr=istrmEnvr, lsSampleOrder=lsSampleNames)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
166 if type(distanceMatrix) is BooleanType:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
167 logging.error(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
168 "MicroPITA.funcGetCentralSamplesByKMedoids:: Could not read in the supplied distance matrix, returning false.")
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
169 return False
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
170
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
171 # Handle unifrac output
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
172 if sMetric in [Metric.c_strUnifracUnweighted, Metric.c_strUnifracWeighted]:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
173 distanceMatrix = distanceMatrix[0]
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
174
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
175 # Log distance matrix
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
176 logging.debug(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
177 "MicroPITA.funcGetCentralSamplesByKMedoids:: Distance matrix for representative selection using metric="+str(sMetric))
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
178
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
179 distance = MLPYDistanceAdaptor(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
180 npaDistanceMatrix=distanceMatrix, fIsCondensedMatrix=True)
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
181
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
182 # Create object to determine clusters/medoids
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
183 medoidsMaker = Kmedoids(k=iNumberSamplesReturned, dist=distance)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
184 # medoidsData includes(1d numpy array, medoids indexes;
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
185 # 1d numpy array, non-medoids indexes;
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
186 # 1d numpy array, cluster membership for non-medoids;
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
187 # double, cost of configuration)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
188 # npaMatrix is samples x rows
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
189 # Build a matrix of lists of indicies to pass to the distance matrix
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
190 lliIndicesMatrix = [[iIndexPosition]
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
191 for iIndexPosition in xrange(0, len(npaMatrix))]
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
192 medoidsData = medoidsMaker.compute(np.array(lliIndicesMatrix))
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
193 logging.debug(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
194 "MicroPITA.funcGetCentralSamplesByKMedoids:: Results from the kmedoid method in representative selection:")
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
195 logging.debug(str(medoidsData))
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
196
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
197 # If returning the same amount of clusters and samples
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
198 # Return centroids
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
199 selectedIndexes = medoidsData[0]
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
200 return [lsSampleNames[selectedIndexes[index]] for index in xrange(0, iNumberSamplesReturned)]
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
201
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
202 # Group 3## Highest Dissimilarity
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
203 # Testing: Happy path tested
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
204 def funcSelectExtremeSamplesFromHClust(self, strBetaMetric, npaAbundanceMatrix, lsSampleNames, iSelectSampleCount, istmBetaMatrix=None, istrmTree=None, istrmEnvr=None):
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
205 """
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
206 Select extreme samples from HClustering.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
207
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
208 :param strBetaMetric: The beta metric to use for distance matrix generation.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
209 :type: String The name of the beta metric to use.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
210 :param npaAbundanceMatrix: Numpy array where row=samples and columns=features.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
211 :type: Numpy Array Abundance data.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
212 :param lsSampleNames: The names of the sample.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
213 :type: List List of strings.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
214 :param iSelectSampleCount: Number of samples to select (return).
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
215 :type: Integer Integer number of samples returned.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
216 :return Samples: List of samples.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
217 :param istmBetaMatrix: File with beta-diversity matrix
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
218 :type: File stream or file path string
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
219 """
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
220
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
221 # If they want all the sample count, return all sample names
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
222 iSampleCount = len(npaAbundanceMatrix[:, 0])
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
223 if iSelectSampleCount == iSampleCount:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
224 return lsSampleNames
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
225
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
226 # Holds the samples to be returned
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
227 lsReturnSamplesRet = []
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
228
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
229 # Generate beta matrix
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
230 # Returns condensed matrix
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
231 tempDistanceMatrix = scipy.spatial.distance.squareform(Metric.funcReadMatrixFile(istmMatrixFile=istmBetaMatrix, lsSampleOrder=lsSampleNames)[0]) if istmBetaMatrix else Metric.funcGetBetaMetric(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
232 npadAbundancies=npaAbundanceMatrix, sMetric=strBetaMetric, istrmTree=istrmTree, istrmEnvr=istrmEnvr, lsSampleOrder=lsSampleNames, fAdditiveInverse=True)
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
233
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
234 if strBetaMetric in [Metric.c_strUnifracUnweighted, Metric.c_strUnifracWeighted]:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
235 tempDistanceMatrix = tempDistanceMatrix[0]
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
236
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
237 if type(tempDistanceMatrix) is BooleanType:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
238 logging.error(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
239 "MicroPITA.funcSelectExtremeSamplesFromHClust:: Could not read in the supplied distance matrix, returning false.")
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
240 return False
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
241
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
242 if istmBetaMatrix:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
243 tempDistanceMatrix = 1-tempDistanceMatrix
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
244
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
245 # Feed beta matrix to linkage to cluster
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
246 # Send condensed matrix
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
247 linkageMatrix = hcluster.linkage(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
248 tempDistanceMatrix, method=self.c_strHierarchicalClusterMethod)
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
249
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
250 # Extract cluster information from dendrogram
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
251 # The linakge matrix is of the form
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
252 # [[int1 int2 doube int3],...]
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
253 # int1 and int1 are the paired samples indexed at 0 and up.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
254 # each list is an entry for a branch that is number starting with the first
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
255 # list being sample count index + 1
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
256 # each list is then named by an increment as they appear
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
257 # this means that if a number is in the list and is = sample count or greater it is not
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
258 # terminal and is instead a branch.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
259 # This method just takes the lowest metric measurement (highest distance pairs/clusters)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
260 # Works much better than the original technique
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
261 # get total number of samples
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
262
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
263 iCurrentSelectCount = 0
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
264 for row in linkageMatrix:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
265 # Get nodes ofthe lowest pairing (so the furthest apart pair)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
266 iNode1 = int(row[0])
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
267 iNode2 = int(row[1])
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
268 # Make sure the nodes are a terminal node (sample) and not a branch in the dendrogram
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
269 # The branching in the dendrogram will start at the number of samples and increment higher.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
270 # Add each of the pair one at a time breaking when enough samples are selected.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
271 if iNode1 < iSampleCount:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
272 lsReturnSamplesRet.append(lsSampleNames[iNode1])
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
273 iCurrentSelectCount = iCurrentSelectCount + 1
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
274 if iCurrentSelectCount == iSelectSampleCount:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
275 break
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
276 if iNode2 < iSampleCount:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
277 lsReturnSamplesRet.append(lsSampleNames[iNode2])
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
278 iCurrentSelectCount = iCurrentSelectCount + 1
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
279 if iCurrentSelectCount == iSelectSampleCount:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
280 break
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
281
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
282 # Return selected samples
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
283 return lsReturnSamplesRet
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
284
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
285 # Group 4## Rank Average of user Defined Taxa
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
286 # Testing: Happy Path Tested
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
287 def funcGetAverageAbundanceSamples(self, abndTable, lsTargetedFeature, fRank=False):
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
288 """
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
289 Averages feature abundance or ranked abundance. Expects a column 0 of taxa id that is skipped.
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
290
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
291 :param abndTable: Abundance Table to analyse
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
292 :type: AbundanceTable Abundance Table
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
293 :param lsTargetedFeature: String names
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
294 :type: list list of string names of features (bugs) which are measured after ranking against the full sample
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
295 :param fRank: Indicates to rank the abundance before getting the average abundance of the features (default false)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
296 :type: boolean Flag indicating ranking abundance before calculating average feature measurement (false= no ranking)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
297 :return List of lists or boolean: List of lists or False on error. One internal list per sample indicating the sample,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
298 feature average abundance or ranked abundance. Lists will already be sorted.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
299 For not Ranked [[sample,average abundance of selected feature,1]]
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
300 For Ranked [[sample,average ranked abundance, average abundance of selected feature]]
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
301 Error Returns false
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
302 """
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
303
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
304 llAbundance = abndTable.funcGetAverageAbundancePerSample(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
305 lsTargetedFeature)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
306 if not llAbundance:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
307 logging.error("MicroPITA.funcGetAverageAbundanceSamples:: Could not get average abundance, returned false. Make sure the features (bugs) are spelled correctly and in the abundance table.")
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
308 return False
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
309 # Add a space for ranking if needed
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
310 # Not ranked will be [[sSample,average abundance,1]]
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
311 # (where 1 will not discriminant ties if used in later functions, so this generalizes)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
312 # Ranked will be [[sSample, average rank, average abundance]]
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
313 llRetAbundance = [[llist[0], -1, llist[1]] for llist in llAbundance]
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
314 # Rank if needed
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
315 if fRank:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
316 abndRanked = abndTable.funcRankAbundance()
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
317 if abndRanked == None:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
318 logging.error(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
319 "MicroPITA.funcGetAverageAbundanceSamples:: Could not rank the abundance table, returned false.")
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
320 return False
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
321 llRetRank = abndRanked.funcGetAverageAbundancePerSample(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
322 lsTargetedFeature)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
323 if not llRetRank:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
324 logging.error(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
325 "MicroPITA.funcGetAverageAbundanceSamples:: Could not get average ranked abundance, returned false. Make sure the features (bugs) are spelled correctly and in the abundance table.")
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
326 return False
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
327 dictRanks = dict(llRetRank)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
328 llRetAbundance = [[a[0], dictRanks[a[0]], a[2]]
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
329 for a in llRetAbundance]
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
330
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
331 # Sort first for ties and then for the main feature
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
332 if not fRank or ConstantsMicropita.c_fBreakRankTiesByDiversity:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
333 llRetAbundance = sorted(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
334 llRetAbundance, key=lambda sampleData: sampleData[2], reverse=not fRank)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
335 if fRank:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
336 llRetAbundance = sorted(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
337 llRetAbundance, key=lambda sampleData: sampleData[1], reverse=not fRank)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
338 return llRetAbundance
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
339
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
340 # Testing: Happy Path Tested
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
341 def funcSelectTargetedTaxaSamples(self, abndMatrix, lsTargetedTaxa, iSampleSelectionCount, sMethod=ConstantsMicropita.lsTargetedFeatureMethodValues[0]):
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
342 """
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
343 Selects samples with the highest ranks or abundance of targeted features.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
344 If ranked, select the highest abundance for tie breaking
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
345
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
346 :param abndMatrix: Abundance table to analyse
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
347 :type: AbundanceTable Abundance table
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
348 :param lsTargetedTaxa: List of features
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
349 :type: list list of strings
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
350 :param iSampleSelectionCount: Number of samples to select
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
351 :type: integer integer
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
352 :param sMethod: Method to select targeted features
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
353 :type: string String (Can be values found in ConstantsMicropita.lsTargetedFeatureMethodValues)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
354 :return List of strings: List of sample names which were selected
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
355 List of strings Empty list is returned on an error.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
356 """
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
357
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
358 # Check data
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
359 if(len(lsTargetedTaxa) < 1):
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
360 logging.error(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
361 "MicroPITA.funcSelectTargetedTaxaSamples. Taxa defined selection was requested but no features were given.")
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
362 return []
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
363
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
364 lsTargetedSamples = self.funcGetAverageAbundanceSamples(abndTable=abndMatrix, lsTargetedFeature=lsTargetedTaxa,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
365 fRank=sMethod.lower() == self.c_strTargetedRanked.lower())
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
366 # If an error occured or the key word for the method was not recognized
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
367 if lsTargetedSamples == False:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
368 logging.error("MicroPITA.funcSelectTargetedTaxaSamples:: Was not able to select for the features given. So targeted feature selection was performed. Check to make sure the features are spelled correctly and exist in the abundance file.")
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
369 return []
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
370
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
371 # Select from results
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
372 return [sSample[0] for sSample in lsTargetedSamples[:iSampleSelectionCount]]
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
373
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
374 # Group 5## Random
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
375 # Testing: Happy path Tested
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
376 def funcGetRandomSamples(self, lsSamples=None, iNumberOfSamplesToReturn=0):
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
377 """
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
378 Returns random sample names of the number given. No replacement.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
379
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
380 :param lsSamples: List of sample names
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
381 :type: list list of strings
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
382 :param iNumberOfSamplesToReturn: Number of samples to select
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
383 :type: integer integer.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
384 :return List: List of selected samples (strings).
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
385 """
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
386
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
387 # Input matrix sample count
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
388 sampleCount = len(lsSamples)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
389
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
390 # Return the full matrix if they ask for a return matrix where length == original
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
391 if(iNumberOfSamplesToReturn >= sampleCount):
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
392 return lsSamples
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
393
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
394 # Get the random indices for the sample (without replacement)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
395 liRandomIndices = random.sample(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
396 range(sampleCount), iNumberOfSamplesToReturn)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
397
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
398 # Create a boolean array of if indexes are to be included in the reduced array
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
399 return [sSample for iIndex, sSample in enumerate(lsSamples) if iIndex in liRandomIndices]
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
400
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
401 # Happy path tested (case 3)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
402 def funcGetAveragePopulation(self, abndTable, lfCompress):
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
403 """
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
404 Get the average row per column in the abndtable.
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
405
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
406 :param abndTable: AbundanceTable of data to be averaged
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
407 :type: AbudanceTable
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
408 :param lfCompress: List of boolean flags (false means to remove sample before averaging
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
409 :type: List of floats
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
410 :return List of doubles:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
411 """
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
412 if sum(lfCompress) == 0:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
413 return []
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
414
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
415 # Get the average populations
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
416 lAverageRet = []
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
417
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
418 for sFeature in abndTable.funcGetAbundanceCopy():
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
419 sFeature = list(sFeature)[1:]
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
420 sFeature = np.compress(lfCompress, sFeature, axis=0)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
421 lAverageRet.append(sum(sFeature)/float(len(sFeature)))
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
422 return lAverageRet
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
423
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
424 # Happy path tested (2 cases)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
425 def funcGetDistanceFromAverage(self, abndTable, ldAverage, lsSamples, lfSelected):
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
426 """
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
427 Given an abundance table and an average sample, this returns the distance of each sample
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
428 (measured using brays-curtis dissimilarity) from the average.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
429 The distances are reduced by needing to be in the lsSamples and being a true in the lfSelected
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
430 (which is associated with the samples in the order of the samples in the abundance table;
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
431 use abundancetable.funcGetSampleNames() to see the order if needed).
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
432
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
433 :param abndTable: Abundance table holding the data to be analyzed.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
434 :type: AbundanceTable
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
435 :param ldAverage: Average population (Average features of the abundance table of samples)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
436 :type: List of doubles which represent the average population
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
437 :param lsSamples: These are the only samples used in the analysis
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
438 :type: List of strings (sample ids)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
439 :param lfSelected: Samples to be included in the analysis
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
440 :type: List of boolean (true means include)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
441 :return: List of distances (doubles)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
442 """
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
443 # Get the distance from label 1 of all samples in label0 splitting into selected and not selected lists
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
444 ldSelectedDistances = []
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
445
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
446 for sSampleName in [sSample for iindex, sSample in enumerate(lsSamples) if lfSelected[iindex]]:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
447 # Get the sample measurements
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
448 ldSelectedDistances.append(Metric.funcGetBrayCurtisDissimilarity(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
449 np.array([abndTable.funcGetSample(sSampleName), ldAverage]))[0])
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
450 return ldSelectedDistances
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
451
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
452 # Happy path tested (1 case)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
453 def funcMeasureDistanceFromLabelToAverageOtherLabel(self, abndTable, lfGroupOfInterest, lfGroupOther):
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
454 """
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
455 Get the distance of samples from one label from the average sample of not the label.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
456 Note: This assumes 2 classes.
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
457
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
458 :param abndTable: Table of data to work out of.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
459 :type: Abundace Table
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
460 :param lfGroupOfInterest: Boolean indicator of the sample being in the first group.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
461 :type: List of floats, true indicating an individual in the group of interest.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
462 :param lfGroupOther: Boolean indicator of the sample being in the other group.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
463 :type: List of floats, true indicating an individual in the
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
464 :return List of List of doubles: [list of tuples (string sample name,double distance) for the selected population, list of tuples for the not selected population]
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
465 """
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
466 # Get all sample names
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
467 lsAllSamples = abndTable.funcGetSampleNames()
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
468
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
469 # Get average populations
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
470 lAverageOther = self.funcGetAveragePopulation(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
471 abndTable=abndTable, lfCompress=lfGroupOther)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
472
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
473 # Get the distance from the average of the other label (label 1)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
474 ldSelectedDistances = self.funcGetDistanceFromAverage(abndTable=abndTable, ldAverage=lAverageOther,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
475 lsSamples=lsAllSamples, lfSelected=lfGroupOfInterest)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
476
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
477 return zip([lsAllSamples[iindex] for iindex, fGroup in enumerate(lfGroupOfInterest) if fGroup], ldSelectedDistances)
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
478
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
479 # Happy path tested (1 test case)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
480 def funcPerformDistanceSelection(self, abndTable, iSelectionCount, sLabel, sValueOfInterest):
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
481 """
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
482 Given metadata, metadata of one value (sValueOfInterest) is measured from the average (centroid) value of another label group.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
483 An iSelectionCount of samples is selected from the group of interest closest to and furthest from the centroid of the other group.
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
484
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
485 :params abndTable: Abundance of measurements
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
486 :type: AbundanceTable
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
487 :params iSelectionCount: The number of samples selected per sample.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
488 :type: Integer Integer greater than 0
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
489 :params sLabel: ID of the metadata which is the supervised label
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
490 :type: String
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
491 :params sValueOfInterest: Metadata value in the sLabel metadta row of the abundance table which defines the group of interest.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
492 :type: String found in the abundance table metadata row indicated by sLabel.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
493 :return list list of tuples (samplename, distance) [[iSelectionCount of tuples closest to the other centroid], [iSelectionCount of tuples farthest from the other centroid], [all tuples of samples not selected]]
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
494 """
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
495
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
496 lsMetadata = abndTable.funcGetMetadata(sLabel)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
497 # Other metadata values
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
498 lsUniqueOtherValues = list(set(lsMetadata)-set(sValueOfInterest))
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
499
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
500 # Get boolean indicator of values of interest
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
501 lfLabelsInterested = [sValueOfInterest ==
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
502 sValue for sValue in lsMetadata]
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
503
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
504 # Get the distances of the items of interest from the other metadata values
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
505 dictDistanceAverages = {}
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
506 for sOtherLabel in lsUniqueOtherValues:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
507 # Get boolean indicator of labels not of interest
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
508 lfLabelsOther = [sOtherLabel == sValue for sValue in lsMetadata]
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
509
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
510 # Get the distances of data from two different groups to the average of the other
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
511 ldValueDistances = dict(self.funcMeasureDistanceFromLabelToAverageOtherLabel(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
512 abndTable, lfLabelsInterested, lfLabelsOther))
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
513
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
514 for sKey in ldValueDistances:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
515 dictDistanceAverages[sKey] = ldValueDistances[sKey] + \
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
516 dictDistanceAverages[sKey] if sKey in dictDistanceAverages else ldValueDistances[sKey]
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
517
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
518 # Finish average by dividing by length of lsUniqueOtherValues
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
519 ltpleAverageDistances = [(sKey, dictDistanceAverages[sKey]/float(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
520 len(lsUniqueOtherValues))) for sKey in dictDistanceAverages]
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
521
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
522 # Sort to extract extremes
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
523 ltpleAverageDistances = sorted(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
524 ltpleAverageDistances, key=operator.itemgetter(1))
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
525
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
526 # Get the closest and farthest distances
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
527 ltupleDiscriminantSamples = ltpleAverageDistances[:iSelectionCount]
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
528 ltupleDistinctSamples = ltpleAverageDistances[iSelectionCount*-1:]
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
529
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
530 # Remove the selected samples from the larger population of distances (better visualization)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
531 ldSelected = [tpleSelected[0]
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
532 for tpleSelected in ltupleDiscriminantSamples+ltupleDistinctSamples]
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
533
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
534 # Return discriminant tuples, distinct tuples, other tuples
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
535 return [ltupleDiscriminantSamples, ltupleDistinctSamples,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
536 [tplData for tplData in ltpleAverageDistances if tplData[0] not in ldSelected]]
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
537
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
538 # Run the supervised method surrounding distance from centroids
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
539 # Happy path tested (3 test cases)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
540 def funcRunSupervisedDistancesFromCentroids(self, abundanceTable, fRunDistinct, fRunDiscriminant,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
541 xOutputSupFile, xPredictSupFile, strSupervisedMetadata,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
542 iSampleSupSelectionCount, lsOriginalSampleNames, lsOriginalLabels, fAppendFiles=False):
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
543 """
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
544 Runs supervised methods based on measuring distances of one label from the centroid of another. NAs are evaluated as theirown group.
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
545
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
546 :param abundanceTable: AbundanceTable
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
547 :type: AbudanceTable Data to analyze
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
548 :param fRunDistinct: Run distinct selection method
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
549 :type: Boolean boolean (true runs method)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
550 :param fRunDiscriminant: Run discriminant method
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
551 :type: Boolean boolean (true runs method)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
552 :param xOutputSupFile: File output from supervised methods detailing data going into the method.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
553 :type: String or FileStream
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
554 :param xPredictSupFile: File output from supervised methods distance results from supervised methods.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
555 :type: String or FileStream
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
556 :param strSupervisedMetadata: The metadata that will be used to group samples.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
557 :type: String
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
558 :param iSampleSupSelectionCount: Number of samples to select
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
559 :type: Integer int sample selection count
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
560 :param lsOriginalSampleNames: List of the sample names, order is important and should be preserved from the abundanceTable.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
561 :type: List of samples
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
562 :param fAppendFiles: Indicates that output files already exist and appending is occuring.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
563 :type: Boolean
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
564 :return Selected Samples: A dictionary of selected samples by selection ID
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
565 Dictionary {"Selection Method":["SampleID","SampleID"...]}
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
566 """
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
567 # Get labels and run one label against many
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
568 lstrMetadata = abundanceTable.funcGetMetadata(strSupervisedMetadata)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
569 dictlltpleDistanceMeasurements = {}
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
570 for sMetadataValue in set(lstrMetadata):
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
571
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
572 # For now perform the selection here for the label of interest against the other labels
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
573 dictlltpleDistanceMeasurements.setdefault(sMetadataValue, []).extend(self.funcPerformDistanceSelection(abndTable=abundanceTable,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
574 iSelectionCount=iSampleSupSelectionCount, sLabel=strSupervisedMetadata, sValueOfInterest=sMetadataValue))
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
575
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
576 # Make expected output files for supervised methods
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
577 # 1. Output file which is similar to an input file for SVMs
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
578 # 2. Output file that is similar to the probabilitic output of a SVM (LibSVM)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
579 # Manly for making output of supervised methods (Distance from Centroid) similar
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
580 # MicropitaVis needs some of these files
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
581 if xOutputSupFile:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
582 if fAppendFiles:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
583 SVM.funcUpdateSVMFileWithAbundanceTable(abndAbundanceTable=abundanceTable, xOutputSVMFile=xOutputSupFile,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
584 lsOriginalLabels=lsOriginalLabels, lsSampleOrdering=lsOriginalSampleNames)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
585 else:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
586 SVM.funcConvertAbundanceTableToSVMFile(abndAbundanceTable=abundanceTable, xOutputSVMFile=xOutputSupFile,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
587 sMetadataLabel=strSupervisedMetadata, lsOriginalLabels=lsOriginalLabels, lsSampleOrdering=lsOriginalSampleNames)
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
588
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
589 # Will contain the samples selected to return
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
590 # One or more of the methods may be active so this is why I am extending instead of just returning the result of each method type
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
591 dictSelectedSamplesRet = dict()
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
592 for sKey, ltplDistances in dictlltpleDistanceMeasurements.items():
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
593 if fRunDistinct:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
594 dictSelectedSamplesRet.setdefault(ConstantsMicropita.c_strDistinct, []).extend([
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
595 ltple[0] for ltple in ltplDistances[1]])
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
596 if fRunDiscriminant:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
597 dictSelectedSamplesRet.setdefault(ConstantsMicropita.c_strDiscriminant, []).extend([
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
598 ltple[0] for ltple in ltplDistances[0]])
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
599
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
600 if xPredictSupFile:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
601 dictFlattenedDistances = dict()
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
602 [dictFlattenedDistances.setdefault(sKey, []).append(tple)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
603 for sKey, lltple in dictlltpleDistanceMeasurements.items()
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
604 for ltple in lltple for tple in ltple]
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
605 if fAppendFiles:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
606 self._updatePredictFile(xPredictSupFile=xPredictSupFile, xInputLabelsFile=xOutputSupFile,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
607 dictltpleDistanceMeasurements=dictFlattenedDistances, abundanceTable=abundanceTable, lsOriginalSampleNames=lsOriginalSampleNames)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
608 else:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
609 self._writeToPredictFile(xPredictSupFile=xPredictSupFile, xInputLabelsFile=xOutputSupFile,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
610 dictltpleDistanceMeasurements=dictFlattenedDistances, abundanceTable=abundanceTable, lsOriginalSampleNames=lsOriginalSampleNames)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
611 return dictSelectedSamplesRet
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
612
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
613 # Two happy path test cases
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
614 def _updatePredictFile(self, xPredictSupFile, xInputLabelsFile, dictltpleDistanceMeasurements, abundanceTable, lsOriginalSampleNames):
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
615 """
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
616 Manages updating the predict file.
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
617
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
618 :param xPredictSupFile: File that has predictions (distances) from the supervised method.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
619 :type: FileStream or String file path
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
620 :param xInputLabelsFile: File that as input to the supervised methods.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
621 :type: FileStream or String file path
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
622 :param dictltpleDistanceMeasurements:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
623 :type: Dictionary of lists of tuples {"labelgroup":[("SampleName",dDistance)...], "labelgroup":[("SampleName",dDistance)...]}
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
624 """
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
625
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
626 if not isinstance(xPredictSupFile, str):
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
627 xPredictSupFile.close()
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
628 xPredictSupFile = xPredictSupFile.name
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
629 csvr = open(xPredictSupFile, 'r')
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
630
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
631 f = csv.reader(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
632 csvr, delimiter=ConstantsBreadCrumbs.c_strBreadCrumbsSVMSpace)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
633 lsHeader = f.next()[1:]
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
634 dictlltpleRead = dict([(sHeader, []) for sHeader in lsHeader])
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
635
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
636 # Read data in
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
637 iSampleIndex = 0
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
638 for sRow in f:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
639 sLabel = sRow[0]
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
640 [dictlltpleRead[lsHeader[iDistanceIndex]].append((lsOriginalSampleNames[iSampleIndex], dDistance)) for iDistanceIndex, dDistance in enumerate(sRow[1:])
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
641 if not dDistance == ConstantsMicropita.c_sEmptyPredictFileValue]
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
642 iSampleIndex += 1
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
643
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
644 # Combine dictltpleDistanceMeasurements with new data
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
645 # If they share a key then merge keeping parameter data
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
646 # If they do not share the key, keep the full data
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
647 dictNew = {}
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
648 for sKey in dictltpleDistanceMeasurements.keys():
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
649 lsSamples = [tple[0]
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
650 for tple in dictltpleDistanceMeasurements[sKey]]
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
651 dictNew[sKey] = dictltpleDistanceMeasurements[sKey]+[tple for tple in dictlltpleRead[sKey] if tple[0]
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
652 not in lsSamples] if sKey in dictlltpleRead.keys() else dictltpleDistanceMeasurements[sKey]
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
653 for sKey in dictlltpleRead:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
654 if sKey not in dictltpleDistanceMeasurements.keys():
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
655 dictNew[sKey] = dictlltpleRead[sKey]
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
656
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
657 # Call writer
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
658 self._writeToPredictFile(xPredictSupFile=xPredictSupFile, xInputLabelsFile=xInputLabelsFile,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
659 dictltpleDistanceMeasurements=dictNew, abundanceTable=abundanceTable,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
660 lsOriginalSampleNames=lsOriginalSampleNames, fFromUpdate=True)
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
661
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
662 # 2 happy path test cases
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
663 def _writeToPredictFile(self, xPredictSupFile, xInputLabelsFile, dictltpleDistanceMeasurements, abundanceTable, lsOriginalSampleNames, fFromUpdate=False):
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
664 """
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
665 Write to the predict file.
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
666
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
667 :param xPredictSupFile: File that has predictions (distances) from the supervised method.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
668 :type: FileStream or String file path
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
669 :param xInputLabelsFile: File that as input to the supervised methods.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
670 :type: FileStream or String file path
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
671 :param dictltpleDistanceMeasurements:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
672 :type: Dictionary of lists of tuples {"labelgroup":[("SampleName",dDistance)...], "labelgroup":[("SampleName",dDistance)...]}
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
673 :param abundanceTable: An abundance table of the sample data.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
674 :type: AbundanceTable
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
675 :param lsOriginalSampleNames: Used if the file is being updated as the sample names so that it may be passed in and consistent with other writing.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
676 Otherwise will use the sample names from the abundance table.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
677 :type: List of strings
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
678 :param fFromUpdate: Indicates if this is part of an update to the file or not.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
679 :type: Boolean
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
680 """
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
681
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
682 xInputLabelsFileName = xInputLabelsFile
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
683 if not isinstance(xInputLabelsFile, str):
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
684 xInputLabelsFileName = xInputLabelsFile.name
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
685 f = csv.writer(open(xPredictSupFile, "w") if isinstance(xPredictSupFile, str)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
686 else xPredictSupFile, delimiter=ConstantsBreadCrumbs.c_strBreadCrumbsSVMSpace)
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
687
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
688 lsAllSampleNames = abundanceTable.funcGetSampleNames()
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
689 lsLabels = SVM.funcReadLabelsFromFile(xSVMFile=xInputLabelsFileName, lsAllSampleNames=lsOriginalSampleNames if fFromUpdate else lsAllSampleNames,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
690 isPredictFile=False)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
691 dictLabels = dict([(sSample, sLabel) for sLabel in lsLabels.keys()
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
692 for sSample in lsLabels[sLabel]])
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
693
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
694 # Dictionay keys will be used to order the predict file
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
695 lsMeasurementKeys = dictltpleDistanceMeasurements.keys()
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
696 # Make header
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
697 f.writerow(["labels"]+lsMeasurementKeys)
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
698
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
699 # Reformat dictionary to make it easier to use
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
700 for sKey in dictltpleDistanceMeasurements:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
701 dictltpleDistanceMeasurements[sKey] = dict(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
702 [ltpl for ltpl in dictltpleDistanceMeasurements[sKey]])
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
703
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
704 for sSample in lsOriginalSampleNames:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
705 # Make body of file
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
706 f.writerow([dictLabels.get(sSample, ConstantsMicropita.c_sEmptyPredictFileValue)] +
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
707 [str(dictltpleDistanceMeasurements[sKey].get(sSample, ConstantsMicropita.c_sEmptyPredictFileValue))
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
708 for sKey in lsMeasurementKeys])
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
709
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
710 def _funcRunNormalizeSensitiveMethods(self, abndData, iSampleSelectionCount, dictSelectedSamples, lsAlphaMetrics, lsBetaMetrics, lsInverseBetaMetrics,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
711 fRunDiversity, fRunRepresentative, fRunExtreme, strAlphaMetadata=None,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
712 istmBetaMatrix=None, istrmTree=None, istrmEnvr=None, fInvertDiversity=False):
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
713 """
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
714 Manages running methods that are sensitive to normalization. This is called twice, once for the set of methods which should not be normalized and the other
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
715 for the set that should be normalized.
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
716
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
717 :param abndData: Abundance table object holding the samples to be measured.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
718 :type: AbundanceTable
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
719 :param iSampleSelectionCount The number of samples to select per method.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
720 :type: Integer
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
721 :param dictSelectedSamples Will be added to as samples are selected {"Method:["strSelectedSampleID","strSelectedSampleID"...]}.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
722 :type: Dictionary
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
723 :param lsAlphaMetrics: List of alpha metrics to use on alpha metric dependent assays (like highest diversity).
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
724 :type: List of strings
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
725 :param lsBetaMetrics: List of beta metrics to use on beta metric dependent assays (like most representative).
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
726 :type: List of strings
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
727 :param lsInverseBetaMetrics: List of inverse beta metrics to use on inverse beta metric dependent assays (like most dissimilar).
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
728 :type: List of strings
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
729 :param fRunDiversity: Run Diversity based methods (true indicates run).
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
730 :type: Boolean
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
731 :param fRunRepresentative: Run Representative based methods (true indicates run).
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
732 :type: Boolean
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
733 :param fRunExtreme: Run Extreme based methods (true indicates run).
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
734 :type: Boolean
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
735 :param istmBetaMatrix: File that has a precalculated beta matrix
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
736 :type: File stream or File path string
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
737 :return Selected Samples: Samples selected by methods.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
738 Dictionary {"Selection Method":["SampleID","SampleID","SampleID",...]}
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
739 """
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
740
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
741 # Sample ids/names
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
742 lsSampleNames = abndData.funcGetSampleNames()
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
743
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
744 # Generate alpha metrics and get most diverse
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
745 if fRunDiversity:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
746
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
747 # Get Alpha metrics matrix
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
748 internalAlphaMatrix = None
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
749 # Name of technique
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
750 strMethod = [
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
751 strAlphaMetadata] if strAlphaMetadata else lsAlphaMetrics
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
752
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
753 # If given an alpha-diversity metadata
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
754 if strAlphaMetadata:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
755 internalAlphaMatrix = [
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
756 [float(strNum) for strNum in abndData.funcGetMetadata(strAlphaMetadata)]]
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
757 else:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
758 # Expects Observations (Taxa (row) x sample (column))
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
759 #Returns [[metric1-sample1, metric1-sample2, metric1-sample3],[metric1-sample1, metric1-sample2, metric1-sample3]]
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
760 internalAlphaMatrix = Metric.funcBuildAlphaMetricsMatrix(npaSampleAbundance=abndData.funcGetAbundanceCopy()
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
761 if not abndData.funcIsSummed()
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
762 else abndData.funcGetFeatureAbundanceTable(abndData.funcGetTerminalNodes()).funcGetAbundanceCopy(),
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
763 lsSampleNames=lsSampleNames, lsDiversityMetricAlpha=lsAlphaMetrics)
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
764
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
765 if internalAlphaMatrix:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
766 # Invert measurments
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
767 if fInvertDiversity:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
768 lldNewDiversity = []
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
769 for lsLine in internalAlphaMatrix:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
770 lldNewDiversity.append(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
771 [1/max(dValue, ConstantsMicropita.c_smallNumber) for dValue in lsLine])
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
772 internalAlphaMatrix = lldNewDiversity
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
773 # Get top ranked alpha diversity by most diverse
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
774 # Expects [[sample1,sample2,sample3...],[sample1,sample2,sample3..],...]
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
775 #Returns [[sampleName1, sampleName2, sampleNameN],[sampleName1, sampleName2, sampleNameN]]
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
776 mostDiverseAlphaSamplesIndexes = self.funcGetTopRankedSamples(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
777 lldMatrix=internalAlphaMatrix, lsSampleNames=lsSampleNames, iTopAmount=iSampleSelectionCount)
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
778
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
779 # Add to results
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
780 for index in xrange(0, len(strMethod)):
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
781 strSelectionMethod = self.dictConvertAMetricDiversity.get(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
782 strMethod[index], ConstantsMicropita.c_strDiversity+"="+strMethod[index])
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
783 dictSelectedSamples.setdefault(strSelectionMethod, []).extend(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
784 mostDiverseAlphaSamplesIndexes[index])
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
785
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
786 logging.info(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
787 "MicroPITA.funcRunNormalizeSensitiveMethods:: Selected Samples 1b")
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
788 logging.info(dictSelectedSamples)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
789
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
790 # Generate beta metrics and
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
791 if fRunRepresentative or fRunExtreme:
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
792
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
793 # Abundance matrix transposed
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
794 npaTransposedAbundance = UtilityMath.funcTransposeDataMatrix(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
795 abndData.funcGetAbundanceCopy(), fRemoveAdornments=True)
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
796
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
797 # Get center selection using clusters/tiling
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
798 # This will be for beta metrics in normalized space
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
799 if fRunRepresentative:
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
800
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
801 if istmBetaMatrix:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
802 # Get representative dissimilarity samples
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
803 medoidSamples = self.funcGetCentralSamplesByKMedoids(npaMatrix=npaTransposedAbundance, sMetric=ConstantsMicropita.c_custom, lsSampleNames=lsSampleNames,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
804 iNumberSamplesReturned=iSampleSelectionCount, istmBetaMatrix=istmBetaMatrix, istrmTree=istrmTree, istrmEnvr=istrmEnvr)
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
805
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
806 if medoidSamples:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
807 dictSelectedSamples.setdefault(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
808 ConstantsMicropita.c_strRepresentative+"="+ConstantsMicropita.c_custom, []).extend(medoidSamples)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
809 else:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
810 logging.info(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
811 "MicroPITA.funcRunNormalizeSensitiveMethods:: Performing representative selection on normalized data.")
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
812 for bMetric in lsBetaMetrics:
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
813
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
814 # Get representative dissimilarity samples
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
815 medoidSamples = self.funcGetCentralSamplesByKMedoids(npaMatrix=npaTransposedAbundance, sMetric=bMetric, lsSampleNames=lsSampleNames,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
816 iNumberSamplesReturned=iSampleSelectionCount, istmBetaMatrix=istmBetaMatrix, istrmTree=istrmTree, istrmEnvr=istrmEnvr)
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
817
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
818 if medoidSamples:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
819 dictSelectedSamples.setdefault(self.dictConvertBMetricToMethod.get(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
820 bMetric, ConstantsMicropita.c_strRepresentative+"="+bMetric), []).extend(medoidSamples)
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
821
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
822 # Get extreme selection using clusters, tiling
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
823 if fRunExtreme:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
824 logging.info(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
825 "MicroPITA.funcRunNormalizeSensitiveMethods:: Performing extreme selection on normalized data.")
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
826 if istmBetaMatrix:
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
827
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
828 # Samples for representative dissimilarity
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
829 # This involves inverting the distance metric,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
830 # Taking the dendrogram level of where the number cluster == the number of samples to select
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
831 # Returning a repersentative sample from each cluster
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
832 extremeSamples = self.funcSelectExtremeSamplesFromHClust(strBetaMetric=ConstantsMicropita.c_custom, npaAbundanceMatrix=npaTransposedAbundance,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
833 lsSampleNames=lsSampleNames, iSelectSampleCount=iSampleSelectionCount, istmBetaMatrix=istmBetaMatrix, istrmTree=istrmTree, istrmEnvr=istrmEnvr)
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
834
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
835 # Add selected samples
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
836 if extremeSamples:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
837 dictSelectedSamples.setdefault(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
838 ConstantsMicropita.c_strExtreme+"="+ConstantsMicropita.c_custom, []).extend(extremeSamples)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
839
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
840 else:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
841 # Run KMedoids with inverse custom distance metric in normalized space
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
842 for bMetric in lsInverseBetaMetrics:
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
843
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
844 # Samples for representative dissimilarity
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
845 # This involves inverting the distance metric,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
846 # Taking the dendrogram level of where the number cluster == the number of samples to select
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
847 # Returning a repersentative sample from each cluster
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
848 extremeSamples = self.funcSelectExtremeSamplesFromHClust(strBetaMetric=bMetric, npaAbundanceMatrix=npaTransposedAbundance, lsSampleNames=lsSampleNames,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
849 iSelectSampleCount=iSampleSelectionCount, istmBetaMatrix=istmBetaMatrix, istrmTree=istrmTree, istrmEnvr=istrmEnvr)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
850
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
851 # Add selected samples
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
852 if extremeSamples:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
853 dictSelectedSamples.setdefault(self.dictConvertInvBMetricToMethod.get(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
854 bMetric, ConstantsMicropita.c_strExtreme+"="+bMetric), []).extend(extremeSamples)
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
855
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
856 logging.info(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
857 "MicroPITA.funcRunNormalizeSensitiveMethods:: Selected Samples 2,3b")
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
858 logging.info(dictSelectedSamples)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
859 return dictSelectedSamples
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
860
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
861 def funcRun(self, strIDName, strLastMetadataName, istmInput,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
862 ostmInputPredictFile, ostmPredictFile, ostmCheckedFile, ostmOutput,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
863 cDelimiter, cFeatureNameDelimiter, strFeatureSelection,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
864 istmFeatures, iCount, lstrMethods, strLastRowMetadata=None, strLabel=None, strStratify=None,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
865 strCustomAlpha=None, strCustomBeta=None, strAlphaMetadata=None, istmBetaMatrix=None, istrmTree=None, istrmEnvr=None,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
866 iMinSeqs=ConstantsMicropita.c_liOccurenceFilter[0], iMinSamples=ConstantsMicropita.c_liOccurenceFilter[1], fInvertDiversity=False):
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
867 """
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
868 Manages the selection of samples given different metrics.
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
869
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
870 :param strIDName: Sample Id metadata row
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
871 :type: String
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
872 :param strLastMetadataName: The id of the metadata positioned last in the abundance table.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
873 :type: String String metadata id.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
874 :param istmInput: File to store input data to supervised methods.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
875 :type: FileStream of String file path
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
876 :param ostmInputPredictFile: File to store distances from supervised methods.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
877 :type: FileStream or String file path
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
878 :param ostmCheckedFile: File to store the AbundanceTable data after it is being checked.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
879 :type: FileStream or String file path
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
880 :param ostmOutPut: File to store sample selection by methods of interest.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
881 :type: FileStream or String file path
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
882 :param cDelimiter: Delimiter of abundance table.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
883 :type: Character Char (default TAB).
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
884 :param cFeatureNameDelimiter: Delimiter of the name of features (for instance if they contain consensus lineages indicating clades).
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
885 :type: Character (default |).
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
886 :param stFeatureSelectionMethod: Which method to use to select features in a targeted manner (Using average ranked abundance or average abundance).
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
887 :type: String (specific values indicated in ConstantsMicropita.lsTargetedFeatureMethodValues).
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
888 :param istmFeatures: File which holds the features of interest if using targeted feature methodology.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
889 :type: FileStream or String file path
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
890 :param iCount: Number of samples to select in each methods, supervised methods select this amount per label if possible.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
891 :type: Integer integer.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
892 :param lstrMethods: List of strings indicating selection techniques.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
893 :type: List of string method names
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
894 :param strLabel: The metadata used for supervised labels.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
895 :type: String
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
896 :param strStratify: The metadata used to stratify unsupervised data.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
897 :type: String
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
898 :param strCustomAlpha: Custom alpha diversity metric
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
899 :type: String
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
900 :param strCustomBeta: Custom beta diversity metric
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
901 :type: String
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
902 :param strAlphaMetadata: Metadata id which is a diveristy metric to use in highest diversity sampling
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
903 :type: String
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
904 :param istmBetaMatrix: File containing precalculated beta-diversity matrix for representative sampling
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
905 :type: FileStream or String file path
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
906 :param istrmTree: File containing tree for phylogentic beta-diversity analysis
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
907 :type: FileStream or String file path
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
908 :param istrmEnvr: File containing environment for phylogentic beta-diversity analysis
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
909 :type: FileStream or String file path
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
910 :param iMinSeqs: Minimum sequence in the occurence filter which filters all features not with a minimum number of sequences in each of a minimum number of samples.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
911 :type: Integer
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
912 :param iMinSamples: Minimum sample count for the occurence filter.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
913 :type: Integer
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
914 :param fInvertDiversity: When true will invert diversity measurements before using.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
915 :type: boolean
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
916 :return Selected Samples: Samples selected by methods.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
917 Dictionary {"Selection Method":["SampleID","SampleID","SampleID",...]}
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
918 """
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
919
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
920 # Holds the top ranked samples from different metrics
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
921 # dict[metric name] = [samplename,samplename...]
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
922 selectedSamples = dict()
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
923
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
924 # If a target feature file is given make sure that targeted feature is in the selection methods, if not add
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
925 if ConstantsMicropita.c_strFeature in lstrMethods:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
926 if not istmFeatures:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
927 logging.error(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
928 "MicroPITA.funcRun:: Did not receive both the Targeted feature file and the feature selection method. MicroPITA did not run.")
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
929 return False
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
930
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
931 # Diversity metrics to run
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
932 # Use custom metrics if specified
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
933 # Custom beta metrics set to normalized only, custom alpha metrics set to count only
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
934 diversityMetricsAlpha = [] if strCustomAlpha or strAlphaMetadata else [
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
935 MicroPITA.c_strInverseSimpsonDiversity]
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
936 diversityMetricsBeta = [] if istmBetaMatrix else [
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
937 strCustomBeta] if strCustomBeta else [MicroPITA.c_strBrayCurtisDissimilarity]
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
938 # inverseDiversityMetricsBeta = [MicroPITA.c_strInvBrayCurtisDissimilarity]
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
939 diversityMetricsAlphaNoNormalize = [strAlphaMetadata] if strAlphaMetadata else [
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
940 strCustomAlpha] if strCustomAlpha else []
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
941 diversityMetricsBetaNoNormalize = []
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
942 # inverseDiversityMetricsBetaNoNormalize = []
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
943
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
944 # Targeted taxa
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
945 userDefinedTaxa = []
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
946
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
947 # Perform different flows flags
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
948 c_RUN_MAX_DIVERSITY_1 = ConstantsMicropita.c_strDiversity in lstrMethods
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
949 c_RUN_REPRESENTIVE_DISSIMILARITY_2 = ConstantsMicropita.c_strRepresentative in lstrMethods
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
950 c_RUN_MAX_DISSIMILARITY_3 = ConstantsMicropita.c_strExtreme in lstrMethods
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
951 c_RUN_RANK_AVERAGE_USER_4 = False
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
952 if ConstantsMicropita.c_strFeature in lstrMethods:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
953 c_RUN_RANK_AVERAGE_USER_4 = True
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
954 if not istmFeatures:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
955 logging.error(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
956 "MicroPITA.funcRun:: No taxa file was given for taxa selection.")
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
957 return False
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
958 # Read in taxa list, break down to lines and filter out empty strings
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
959 userDefinedTaxa = filter(None, (s.strip()
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
960 for s in istmFeatures.readlines()))
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
961 c_RUN_RANDOM_5 = ConstantsMicropita.c_strRandom in lstrMethods
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
962 c_RUN_DISTINCT = ConstantsMicropita.c_strDistinct in lstrMethods
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
963 c_RUN_DISCRIMINANT = ConstantsMicropita.c_strDiscriminant in lstrMethods
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
964
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
965 # Read in abundance data
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
966 # Abundance is a structured array. Samples (column) by Taxa (rows) with the taxa id row included as the column index=0
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
967 # Abundance table object to read in and manage data
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
968 totalAbundanceTable = AbundanceTable.funcMakeFromFile(xInputFile=istmInput, lOccurenceFilter=[iMinSeqs, iMinSamples],
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
969 cDelimiter=cDelimiter, sMetadataID=strIDName, sLastMetadataRow=strLastRowMetadata,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
970 sLastMetadata=strLastMetadataName, cFeatureNameDelimiter=cFeatureNameDelimiter, xOutputFile=ostmCheckedFile)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
971 if not totalAbundanceTable:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
972 logging.error("MicroPITA.funcRun:: Could not read in the abundance table. Analysis was not performed." +
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
973 " This often occurs when the Last Metadata is not specified correctly." +
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
974 " Please check to make sure the Last Metadata selection is the row of the last metadata," +
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
975 " all values after this selection should be microbial measurements and should be numeric.")
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
976 return False
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
977
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
978 lsOriginalLabels = SVM.funcMakeLabels(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
979 totalAbundanceTable.funcGetMetadata(strLabel)) if strLabel else strLabel
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
980
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
981 dictTotalMetadata = totalAbundanceTable.funcGetMetadataCopy()
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
982 logging.debug("MicroPITA.funcRun:: Received metadata=" +
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
983 str(dictTotalMetadata))
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
984 # If there is only 1 unique value for the labels, do not run the Supervised methods
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
985 if strLabel and (len(set(dictTotalMetadata.get(strLabel, []))) < 2):
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
986 logging.error("The label " + strLabel + " did not have 2 or more values. Labels found=" +
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
987 str(dictTotalMetadata.get(strLabel, [])))
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
988 return False
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
989
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
990 #Run unsupervised methods###
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
991 # Stratify the data if need be and drop the old data
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
992 lStratifiedAbundanceTables = totalAbundanceTable.funcStratifyByMetadata(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
993 strStratify) if strStratify else [totalAbundanceTable]
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
994
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
995 # For each stratified abundance block or for the unstratfified abundance
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
996 # Run the unsupervised blocks
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
997 fAppendSupFiles = False
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
998 for stratAbundanceTable in lStratifiedAbundanceTables:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
999 logging.info("MicroPITA.funcRun:: Running abundance block:" +
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1000 stratAbundanceTable.funcGetName())
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1001
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1002 # NOT SUMMED, NOT NORMALIZED
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1003 # Only perform if the data is not yet normalized
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1004 if not stratAbundanceTable.funcIsNormalized():
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1005 # Need to first work with unnormalized data
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1006 if c_RUN_MAX_DIVERSITY_1 or c_RUN_REPRESENTIVE_DISSIMILARITY_2 or c_RUN_MAX_DISSIMILARITY_3:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1007
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1008 self._funcRunNormalizeSensitiveMethods(abndData=stratAbundanceTable, iSampleSelectionCount=iCount,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1009 dictSelectedSamples=selectedSamples, lsAlphaMetrics=diversityMetricsAlphaNoNormalize,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1010 lsBetaMetrics=diversityMetricsBetaNoNormalize,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1011 lsInverseBetaMetrics=diversityMetricsBetaNoNormalize,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1012 fRunDiversity=c_RUN_MAX_DIVERSITY_1, fRunRepresentative=c_RUN_REPRESENTIVE_DISSIMILARITY_2,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1013 fRunExtreme=c_RUN_MAX_DISSIMILARITY_3, strAlphaMetadata=strAlphaMetadata,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1014 istrmTree=istrmTree, istrmEnvr=istrmEnvr, fInvertDiversity=fInvertDiversity)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1015
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1016 # Generate selection by the rank average of user defined taxa
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1017 # Expects (Taxa (row) by Samples (column))
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1018 # Expects a column 0 of taxa id that is skipped
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1019 # Returns [(sample name,average,rank)]
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1020 # SUMMED AND NORMALIZED
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1021 stratAbundanceTable.funcSumClades()
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1022 # Normalize data at this point
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1023 stratAbundanceTable.funcNormalize()
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1024 if c_RUN_RANK_AVERAGE_USER_4:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1025 selectedSamples[ConstantsMicropita.c_strFeature] = self.funcSelectTargetedTaxaSamples(abndMatrix=stratAbundanceTable,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1026 lsTargetedTaxa=userDefinedTaxa, iSampleSelectionCount=iCount, sMethod=strFeatureSelection)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1027 logging.info("MicroPITA.funcRun:: Selected Samples Rank")
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1028 logging.info(selectedSamples)
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1029
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1030 # SUMMED AND NORMALIZED analysis block
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1031 # Diversity based metric will move reduce to terminal taxa as needed
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1032 if c_RUN_MAX_DIVERSITY_1 or c_RUN_REPRESENTIVE_DISSIMILARITY_2 or c_RUN_MAX_DISSIMILARITY_3:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1033
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1034 self._funcRunNormalizeSensitiveMethods(abndData=stratAbundanceTable, iSampleSelectionCount=iCount,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1035 dictSelectedSamples=selectedSamples, lsAlphaMetrics=diversityMetricsAlpha,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1036 lsBetaMetrics=diversityMetricsBeta,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1037 lsInverseBetaMetrics=diversityMetricsBeta,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1038 fRunDiversity=c_RUN_MAX_DIVERSITY_1, fRunRepresentative=c_RUN_REPRESENTIVE_DISSIMILARITY_2,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1039 fRunExtreme=c_RUN_MAX_DISSIMILARITY_3,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1040 istmBetaMatrix=istmBetaMatrix, istrmTree=istrmTree, istrmEnvr=istrmEnvr, fInvertDiversity=fInvertDiversity)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1041
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1042 # 5::Select randomly
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1043 # Expects sampleNames = List of sample names [name, name, name...]
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1044 if(c_RUN_RANDOM_5):
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1045 # Select randomly from sample names
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1046 selectedSamples[ConstantsMicropita.c_strRandom] = self.funcGetRandomSamples(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1047 lsSamples=stratAbundanceTable.funcGetSampleNames(), iNumberOfSamplesToReturn=iCount)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1048 logging.info("MicroPITA.funcRun:: Selected Samples Random")
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1049 logging.info(selectedSamples)
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1050
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1051 # Perform supervised selection
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1052 if c_RUN_DISTINCT or c_RUN_DISCRIMINANT:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1053 if strLabel:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1054 dictSelectionRet = self.funcRunSupervisedDistancesFromCentroids(abundanceTable=stratAbundanceTable,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1055 fRunDistinct=c_RUN_DISTINCT, fRunDiscriminant=c_RUN_DISCRIMINANT,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1056 xOutputSupFile=ostmInputPredictFile, xPredictSupFile=ostmPredictFile,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1057 strSupervisedMetadata=strLabel, iSampleSupSelectionCount=iCount,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1058 lsOriginalSampleNames=totalAbundanceTable.funcGetSampleNames(),
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1059 lsOriginalLabels=lsOriginalLabels,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1060 fAppendFiles=fAppendSupFiles)
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1061
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1062 [selectedSamples.setdefault(sKey, []).extend(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1063 lValue) for sKey, lValue in dictSelectionRet.items()]
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1064
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1065 if not fAppendSupFiles:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1066 fAppendSupFiles = True
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1067 logging.info(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1068 "MicroPITA.funcRun:: Selected Samples Unsupervised")
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1069 logging.info(selectedSamples)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1070 return selectedSamples
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1071
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1072 # Testing: Happy path tested
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1073 @staticmethod
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1074 def funcWriteSelectionToFile(dictSelection, xOutputFilePath):
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1075 """
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1076 Writes the selection of samples by method to an output file.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1077
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1078 :param dictSelection: The dictionary of selections by method to be written to a file.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1079 :type: Dictionary The dictionary of selections by method {"method":["sample selected","sample selected"...]}
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1080 :param xOutputFilePath: FileStream or String path to file inwhich the dictionary is written.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1081 :type: String FileStream or String path to file
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1082 """
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1083
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1084 if not dictSelection:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1085 return
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1086
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1087 # Open file
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1088 f = csv.writer(open(xOutputFilePath, "w") if isinstance(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1089 xOutputFilePath, str) else xOutputFilePath, delimiter=ConstantsMicropita.c_outputFileDelim)
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1090
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1091 # Create output content from dictionary
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1092 for sKey in dictSelection:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1093 f.writerow([sKey]+dictSelection[sKey])
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1094 logging.debug(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1095 "MicroPITA.funcRun:: Selected samples output to file:"+str(dictSelection[sKey]))
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1096
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1097 # Testing: Happy Path tested
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1098 @staticmethod
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1099 def funcReadSelectionFileToDictionary(xInputFile):
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1100 """
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1101 Reads in an output selection file from micropita and formats it into a dictionary.
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1102
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1103 :param xInputFile: String path to file or file stream to read and translate into a dictionary.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1104 {"method":["sample selected","sample selected"...]}
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1105 :type: FileStream or String Path to file
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1106 :return Dictionary: Samples selected by methods.
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1107 Dictionary {"Selection Method":["SampleID","SampleID","SampleID",...]}
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1108 """
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1109
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1110 # Open file
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1111 istmReader = csv.reader(open(xInputFile, 'r') if isinstance(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1112 xInputFile, str) else xInputFile, delimiter=ConstantsMicropita.c_outputFileDelim)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1113
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1114 # Dictionary to hold selection data
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1115 return dict([(lsLine[0], lsLine[1:]) for lsLine in istmReader])
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1116
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1117
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1118 # Set up arguments reader
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1119 argp = argparse.ArgumentParser(prog="MicroPITA.py",
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1120 description="""Selects samples from abundance tables based on various selection schemes.""")
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1121
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1122 args = argp.add_argument_group("Common", "Commonly modified options")
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1123 args.add_argument(ConstantsMicropita.c_strCountArgument, "--num", dest="iCount",
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1124 metavar="samples", default=10, type=int, help=ConstantsMicropita.c_strCountHelp)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1125 args.add_argument("-m", "--method", dest="lstrMethods", metavar="method", default=[], help=ConstantsMicropita.c_strSelectionTechniquesHelp,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1126 choices=ConstantsMicropita.c_lsAllMethods, action="append")
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1127
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1128 args = argp.add_argument_group(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1129 "Custom", "Selecting and inputing custom metrics")
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1130 args.add_argument("-a", "--alpha", dest="strAlphaDiversity", metavar="AlphaDiversity", default=None,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1131 help=ConstantsMicropita.c_strCustomAlphaDiversityHelp, choices=Metric.setAlphaDiversities)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1132 args.add_argument("-b", "--beta", dest="strBetaDiversity", metavar="BetaDiversity", default=None, help=ConstantsMicropita.c_strCustomBetaDiversityHelp,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1133 choices=list(Metric.setBetaDiversities)+[Metric.c_strUnifracUnweighted, Metric.c_strUnifracWeighted])
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1134 args.add_argument("-q", "--alphameta", dest="strAlphaMetadata", metavar="AlphaDiversityMetadata",
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1135 default=None, help=ConstantsMicropita.c_strCustomAlphaDiversityMetadataHelp)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1136 args.add_argument("-x", "--betamatrix", dest="istmBetaMatrix", metavar="BetaDiversityMatrix",
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1137 default=None, help=ConstantsMicropita.c_strCustomBetaDiversityMatrixHelp)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1138 args.add_argument("-o", "--tree", dest="istrmTree", metavar="PhylogeneticTree",
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1139 default=None, help=ConstantsMicropita.c_strCustomPhylogeneticTreeHelp)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1140 args.add_argument("-i", "--envr", dest="istrmEnvr", metavar="EnvironmentFile",
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1141 default=None, help=ConstantsMicropita.c_strCustomEnvironmentFileHelp)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1142 args.add_argument("-f", "--invertDiversity", dest="fInvertDiversity", action="store_true",
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1143 default=False, help=ConstantsMicropita.c_strInvertDiversityHelp)
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1144
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1145 args = argp.add_argument_group(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1146 "Miscellaneous", "Row/column identifiers and feature targeting options")
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1147 args.add_argument("-d", ConstantsMicropita.c_strIDNameArgument, dest="strIDName",
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1148 metavar="sample_id", help=ConstantsMicropita.c_strIDNameHelp)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1149 args.add_argument("-l", ConstantsMicropita.c_strLastMetadataNameArgument, dest="strLastMetadataName", metavar="metadata_id", default=None,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1150 help=ConstantsMicropita.c_strLastMetadataNameHelp)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1151 args.add_argument("-r", ConstantsMicropita.c_strTargetedFeatureMethodArgument, dest="strFeatureSelection", metavar="targeting_method", default=ConstantsMicropita.lsTargetedFeatureMethodValues[0],
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1152 choices=ConstantsMicropita.lsTargetedFeatureMethodValues, help=ConstantsMicropita.c_strTargetedFeatureMethodHelp)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1153 args.add_argument("-t", ConstantsMicropita.c_strTargetedSelectionFileArgument, dest="istmFeatures",
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1154 metavar="feature_file", type=argparse.FileType("rU"), help=ConstantsMicropita.c_strTargetedSelectionFileHelp)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1155 args.add_argument("-w", ConstantsMicropita.c_strFeatureMetadataArgument, dest="strLastFeatureMetadata",
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1156 metavar="Last_Feature_Metadata", default=None, help=ConstantsMicropita.c_strFeatureMetadataHelp)
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1157
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1158 args = argp.add_argument_group(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1159 "Data labeling", "Metadata IDs for strata and supervised label values")
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1160 args.add_argument("-e", ConstantsMicropita.c_strSupervisedLabelArgument, dest="strLabel",
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1161 metavar="supervised_id", help=ConstantsMicropita.c_strSupervisedLabelHelp)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1162 args.add_argument("-s", ConstantsMicropita.c_strUnsupervisedStratifyMetadataArgument, dest="strUnsupervisedStratify", metavar="stratify_id",
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1163 help=ConstantsMicropita.c_strUnsupervisedStratifyMetadataHelp)
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1164
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1165 args = argp.add_argument_group(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1166 "File formatting", "Rarely modified file formatting options")
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1167 args.add_argument("-j", ConstantsMicropita.c_strFileDelimiterArgument, dest="cFileDelimiter",
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1168 metavar="column_delimiter", default="\t", help=ConstantsMicropita.c_strFileDelimiterHelp)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1169 args.add_argument("-k", ConstantsMicropita.c_strFeatureNameDelimiterArgument, dest="cFeatureNameDelimiter",
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1170 metavar="taxonomy_delimiter", default="|", help=ConstantsMicropita.c_strFeatureNameDelimiterHelp)
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1171
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1172 args = argp.add_argument_group(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1173 "Debugging", "Debugging options - modify at your own risk!")
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1174 args.add_argument("-v", ConstantsMicropita.c_strLoggingArgument, dest="strLogLevel", metavar="log_level", default="WARNING",
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1175 choices=ConstantsMicropita.c_lsLoggingChoices, help=ConstantsMicropita.c_strLoggingHelp)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1176 args.add_argument("-c", ConstantsMicropita.c_strCheckedAbundanceFileArgument, dest="ostmCheckedFile",
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1177 metavar="output_qc", type=argparse.FileType("w"), help=ConstantsMicropita.c_strCheckedAbundanceFileHelp)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1178 args.add_argument("-g", ConstantsMicropita.c_strLoggingFileArgument, dest="ostmLoggingFile",
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1179 metavar="output_log", type=argparse.FileType("w"), help=ConstantsMicropita.c_strLoggingFileHelp)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1180 args.add_argument("-u", ConstantsMicropita.c_strSupervisedInputFile, dest="ostmInputPredictFile",
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1181 metavar="output_scaled", type=argparse.FileType("w"), help=ConstantsMicropita.c_strSupervisedInputFileHelp)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1182 args.add_argument("-p", ConstantsMicropita.c_strSupervisedPredictedFile, dest="ostmPredictFile",
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1183 metavar="output_labels", type=argparse.FileType("w"), help=ConstantsMicropita.c_strSupervisedPredictedFileHelp)
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1184
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1185 argp.add_argument("istmInput", metavar="input.pcl/biome", type=argparse.FileType("rU"), help=ConstantsMicropita.c_strAbundanceFileHelp,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1186 default=sys.stdin)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1187 argp.add_argument("ostmOutput", metavar="output.txt", type=argparse.FileType("w"), help=ConstantsMicropita.c_strGenericOutputDataFileHelp,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1188 default=sys.stdout)
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1189
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1190 __doc__ = "::\n\n\t" + argp.format_help().replace("\n", "\n\t") + __doc__
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1191
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1192
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1193 def _main():
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1194 args = argp.parse_args()
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1195
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1196 # Set up logger
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1197 iLogLevel = getattr(logging, args.strLogLevel.upper(), None)
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1198 logging.basicConfig(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1199 stream=args.ostmLoggingFile if args.ostmLoggingFile else sys.stderr, filemode='w', level=iLogLevel)
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1200
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1201 # Run micropita
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1202 logging.info("MicroPITA:: Start microPITA")
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1203 microPITA = MicroPITA()
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1204
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1205 # Argparse will append to the default but will not remove the default so I do this here
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1206 if not len(args.lstrMethods):
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1207 args.lstrMethods = [ConstantsMicropita.c_strRepresentative]
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1208
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1209 dictSelectedSamples = microPITA.funcRun(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1210 strIDName=args.strIDName,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1211 strLastMetadataName=args.strLastMetadataName,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1212 istmInput=args.istmInput,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1213 ostmInputPredictFile=args.ostmInputPredictFile,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1214 ostmPredictFile=args.ostmPredictFile,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1215 ostmCheckedFile=args.ostmCheckedFile,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1216 ostmOutput=args.ostmOutput,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1217 cDelimiter=args.cFileDelimiter,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1218 cFeatureNameDelimiter=args.cFeatureNameDelimiter,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1219 istmFeatures=args.istmFeatures,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1220 strFeatureSelection=args.strFeatureSelection,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1221 iCount=args.iCount,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1222 strLastRowMetadata=args.strLastFeatureMetadata,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1223 strLabel=args.strLabel,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1224 strStratify=args.strUnsupervisedStratify,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1225 strCustomAlpha=args.strAlphaDiversity,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1226 strCustomBeta=args.strBetaDiversity,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1227 strAlphaMetadata=args.strAlphaMetadata,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1228 istmBetaMatrix=args.istmBetaMatrix,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1229 istrmTree=args.istrmTree,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1230 istrmEnvr=args.istrmEnvr,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1231 lstrMethods=args.lstrMethods,
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1232 fInvertDiversity=args.fInvertDiversity
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1233 )
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1234
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1235 if not dictSelectedSamples:
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1236 logging.error("MicroPITA:: Error, did not get a result from analysis.")
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1237 return -1
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1238 logging.info("End microPITA")
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1239
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1240 # Log output for debugging
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1241 logging.debug("MicroPITA:: Returned the following samples:" +
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1242 str(dictSelectedSamples))
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1243
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1244 # Write selection to file
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1245 microPITA.funcWriteSelectionToFile(
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1246 dictSelection=dictSelectedSamples, xOutputFilePath=args.ostmOutput)
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1247
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1248
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1249 if __name__ == "__main__":
28
1d09ffab87a7 Uploaded MicroPITA.py - fixed spaces and tabs inconsistencies
george-weingart
parents: 16
diff changeset
1250 _main()