Mercurial > repos > george-weingart > micropita
comparison src/breadcrumbs/scripts/scriptPcoa.py @ 0:d589875b8125
First version of micropita in this repository
| author | george-weingart |
|---|---|
| date | Wed, 30 Apr 2014 21:35:07 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:d589875b8125 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 """ | |
| 3 Author: Timothy Tickle | |
| 4 Description: Make PCoA of an abundance file | |
| 5 """ | |
| 6 | |
| 7 __author__ = "Timothy Tickle" | |
| 8 __copyright__ = "Copyright 2012" | |
| 9 __credits__ = ["Timothy Tickle"] | |
| 10 __license__ = "" | |
| 11 __version__ = "" | |
| 12 __maintainer__ = "Timothy Tickle" | |
| 13 __email__ = "ttickle@sph.harvard.edu" | |
| 14 __status__ = "Development" | |
| 15 | |
| 16 import sys | |
| 17 import argparse | |
| 18 from src.AbundanceTable import AbundanceTable | |
| 19 from src.Metric import Metric | |
| 20 import csv | |
| 21 import os | |
| 22 from src.PCoA import PCoA | |
| 23 | |
| 24 #Set up arguments reader | |
| 25 argp = argparse.ArgumentParser( prog = "scriptPcoa.py", | |
| 26 description = """PCoAs an abundance file given a metadata.\nExample:python scriptPcoa.py -i TID -l STSite""" ) | |
| 27 | |
| 28 #Arguments | |
| 29 #For table | |
| 30 argp.add_argument("-i","--id", dest="sIDName", default="ID", help="Abundance Table ID") | |
| 31 argp.add_argument("-l","--meta", dest="sLastMetadataName", help="Last metadata name") | |
| 32 argp.add_argument("-d","--fDelim", dest= "cFileDelimiter", action= "store", default="\t", help="File delimiter, default tab") | |
| 33 argp.add_argument("-f","--featureDelim", dest="cFeatureNameDelimiter", action= "store", metavar="Feature Name Delimiter", default="|", help="Feature delimiter") | |
| 34 | |
| 35 argp.add_argument("-n","--doNorm", dest="fDoNormData", action="store_true", default=False, help="Flag to turn on normalization") | |
| 36 argp.add_argument("-s","--doSum", dest="fDoSumData", action="store_true", default=False, help="Flag to turn on summation") | |
| 37 | |
| 38 argp.add_argument("-p","--paint", dest="sLabel", metavar= "Label", default=None, help="Label to paint in the PCoA") | |
| 39 argp.add_argument("-m","--metric", dest="strMetric", metavar = "distance", default = PCoA.c_BRAY_CURTIS, help ="Distance metric to use. Pick from braycurtis, canberra, chebyshev, cityblock, correlation, cosine, euclidean, hamming, spearman, sqeuclidean, unifrac_unweighted, unifrac_weighted") | |
| 40 argp.add_argument("-o","--outputFile", dest="strOutFile", metavar= "outputFile", default=None, help="Specify the path for the output figure.") | |
| 41 argp.add_argument("-D","--DistanceMatrix", dest="strFileDistanceMatrix", metavar= "strFileDistanceMatrix", default=None, help="Specify the path for outputing the distance matrix (if interested). Default this will not output.") | |
| 42 argp.add_argument("-C","--CoordinatesMatrix", dest="strFileCoordinatesMatrix", metavar= "strFileCoordinatesMatrix", default=None, help="Specify the path for outputing the x,y coordinates matrix (Dim 1 and 2). Default this will not output.") | |
| 43 | |
| 44 # Unifrac arguments | |
| 45 argp.add_argument("-t","--unifracTree", dest="istrmTree", metavar="UnifracTreeFile", default=None, help="Optional file only needed for UniFrac calculations.") | |
| 46 argp.add_argument("-e","--unifracEnv", dest="istrmEnvr", metavar="UnifracEnvFile", default=None, help="Optional file only needed for UniFrac calculations.") | |
| 47 argp.add_argument("-c","--unifracColor", dest="fileUnifracColor", metavar="UnifracColorFile", default = None, help="A text file indicating the groupings of metadata to color. Each line in the file is a group to color. An example file line would be 'GroupName:ID,ID,ID,ID'") | |
| 48 | |
| 49 argp.add_argument("strFileAbund", metavar = "Abundance file", nargs="?", help ="Input data file") | |
| 50 | |
| 51 args = argp.parse_args( ) | |
| 52 | |
| 53 #Read in abundance table | |
| 54 abndTable = None | |
| 55 if args.strFileAbund: | |
| 56 abndTable = AbundanceTable.funcMakeFromFile(args.strFileAbund, | |
| 57 cDelimiter = args.cFileDelimiter, | |
| 58 sMetadataID = args.sIDName, | |
| 59 sLastMetadata = args.sLastMetadataName, | |
| 60 cFeatureNameDelimiter= args.cFeatureNameDelimiter) | |
| 61 | |
| 62 #Normalize if need | |
| 63 if args.fDoSumData: | |
| 64 abndTable.funcSumClades() | |
| 65 | |
| 66 #Sum if needed | |
| 67 if args.fDoNormData: | |
| 68 abndTable.funcNormalize() | |
| 69 | |
| 70 #Get the metadata to paint | |
| 71 lsKeys = None | |
| 72 if abndTable: | |
| 73 lsKeys = abndTable.funcGetMetadataCopy().keys() if not args.sLabel else [args.sLabel] | |
| 74 | |
| 75 #Get pieces of output file | |
| 76 if not args.strOutFile: | |
| 77 if not args.strFileAbund: | |
| 78 args.strOutFile = os.path.splitext(os.path.basename(args.istrmEnvr))[0]+"-pcoa.pdf" | |
| 79 else: | |
| 80 args.strOutFile = os.path.splitext(os.path.basename(args.strFileAbund))[0]+"-pcoa.pdf" | |
| 81 lsFilePieces = os.path.splitext(args.strOutFile) | |
| 82 | |
| 83 # Make PCoA object | |
| 84 # Get PCoA object and plot | |
| 85 pcoa = PCoA() | |
| 86 if(not args.strMetric in [Metric.c_strUnifracUnweighted,Metric.c_strUnifracWeighted]) and abndTable: | |
| 87 pcoa.loadData(abndTable,True) | |
| 88 # Optional args.strFileDistanceMatrix if not none will force a printing of the distance measures to the path in args.strFileDistanceMatrix | |
| 89 pcoa.run(tempDistanceMetric=args.strMetric, iDims=2, strDistanceMatrixFile=args.strFileDistanceMatrix, istrmTree=args.istrmTree, istrmEnvr=args.istrmEnvr) | |
| 90 | |
| 91 # Write dim 1 and 2 coordinates to file | |
| 92 if args.strFileCoordinatesMatrix: | |
| 93 lsIds = pcoa.funcGetIDs() | |
| 94 mtrxCoordinates = pcoa.funcGetCoordinates() | |
| 95 csvrCoordinates = csv.writer(open(args.strFileCoordinatesMatrix, 'w')) | |
| 96 csvrCoordinates.writerow(["ID","Dimension_1","Dimension_2"]) | |
| 97 for x in xrange(mtrxCoordinates.shape[0]): | |
| 98 strId = lsIds[x] if lsIds else "" | |
| 99 csvrCoordinates.writerow([strId]+mtrxCoordinates[x].tolist()) | |
| 100 | |
| 101 # Paint metadata | |
| 102 if lsKeys: | |
| 103 for iIndex in xrange(len(lsKeys)): | |
| 104 lsMetadata = abndTable.funcGetMetadata(lsKeys[iIndex]) | |
| 105 | |
| 106 pcoa.plotList(lsLabelList = lsMetadata, | |
| 107 strOutputFileName = lsFilePieces[0]+"-"+lsKeys[iIndex]+lsFilePieces[1], | |
| 108 iSize=20, | |
| 109 dAlpha=1.0, | |
| 110 charForceColor=None, | |
| 111 charForceShape=None, | |
| 112 fInvert=False, | |
| 113 iDim1=1, | |
| 114 iDim2=2) | |
| 115 | |
| 116 if args.strMetric in [Metric.c_strUnifracUnweighted,Metric.c_strUnifracWeighted]: | |
| 117 | |
| 118 c_sNotGiven = "Not_specified" | |
| 119 | |
| 120 lsIds = pcoa.funcGetIDs() | |
| 121 lsGroupLabels = [c_sNotGiven for s in lsIds] | |
| 122 | |
| 123 if args.fileUnifracColor: | |
| 124 | |
| 125 # Read color file and make a dictionary to convert ids | |
| 126 lsColorLines = csv.reader(open(args.fileUnifracColor)) | |
| 127 dictConvertIDToGroup = {} | |
| 128 for lsLine in lsColorLines: | |
| 129 if lsLine: | |
| 130 sGroupID, sFirstID = lsLine[0].split(":") | |
| 131 dictConvertIDToGroup.update(dict([(sID,sGroupID) for sID in [sFirstID]+lsLine[1:]])) | |
| 132 | |
| 133 lsGroupLabels = [dictConvertIDToGroup.get(sID,c_sNotGiven) for sID in lsIds] | |
| 134 | |
| 135 pcoa.plotList(lsLabelList = lsGroupLabels, | |
| 136 strOutputFileName = lsFilePieces[0]+"-"+args.strMetric+lsFilePieces[1], | |
| 137 iSize=20, | |
| 138 dAlpha=1.0, | |
| 139 charForceColor=None, | |
| 140 charForceShape=None, | |
| 141 fInvert=False, | |
| 142 iDim1=1, | |
| 143 iDim2=2) |
