| 0 | 1 #!/usr/bin/env python | 
|  | 2 """ | 
|  | 3 Author: Timothy Tickle | 
|  | 4 Description: Make PCoA of an abundance file | 
|  | 5 """ | 
|  | 6 | 
|  | 7 __author__ = "Timothy Tickle" | 
|  | 8 __copyright__ = "Copyright 2012" | 
|  | 9 __credits__ = ["Timothy Tickle"] | 
|  | 10 __license__ = "" | 
|  | 11 __version__ = "" | 
|  | 12 __maintainer__ = "Timothy Tickle" | 
|  | 13 __email__ = "ttickle@sph.harvard.edu" | 
|  | 14 __status__ = "Development" | 
|  | 15 | 
|  | 16 import sys | 
|  | 17 import argparse | 
|  | 18 from src.AbundanceTable import AbundanceTable | 
|  | 19 from src.Metric import Metric | 
|  | 20 import csv | 
|  | 21 import os | 
|  | 22 from src.PCoA import PCoA | 
|  | 23 | 
|  | 24 #Set up arguments reader | 
|  | 25 argp = argparse.ArgumentParser( prog = "scriptPcoa.py", | 
|  | 26     description = """PCoAs an abundance file given a metadata.\nExample:python scriptPcoa.py -i TID -l STSite""" ) | 
|  | 27 | 
|  | 28 #Arguments | 
|  | 29 #For table | 
|  | 30 argp.add_argument("-i","--id", dest="sIDName", default="ID", help="Abundance Table ID") | 
|  | 31 argp.add_argument("-l","--meta", dest="sLastMetadataName", help="Last metadata name") | 
|  | 32 argp.add_argument("-d","--fDelim", dest= "cFileDelimiter", action= "store", default="\t", help="File delimiter, default tab") | 
|  | 33 argp.add_argument("-f","--featureDelim", dest="cFeatureNameDelimiter", action= "store", metavar="Feature Name Delimiter", default="|", help="Feature delimiter") | 
|  | 34 | 
|  | 35 argp.add_argument("-n","--doNorm", dest="fDoNormData", action="store_true", default=False, help="Flag to turn on normalization") | 
|  | 36 argp.add_argument("-s","--doSum", dest="fDoSumData", action="store_true", default=False, help="Flag to turn on summation") | 
|  | 37 | 
|  | 38 argp.add_argument("-p","--paint", dest="sLabel", metavar= "Label", default=None, help="Label to paint in the PCoA") | 
|  | 39 argp.add_argument("-m","--metric", dest="strMetric", metavar = "distance", default = PCoA.c_BRAY_CURTIS, help ="Distance metric to use. Pick from braycurtis, canberra, chebyshev, cityblock, correlation, cosine, euclidean, hamming, spearman, sqeuclidean, unifrac_unweighted, unifrac_weighted") | 
|  | 40 argp.add_argument("-o","--outputFile", dest="strOutFile", metavar= "outputFile", default=None, help="Specify the path for the output figure.") | 
|  | 41 argp.add_argument("-D","--DistanceMatrix", dest="strFileDistanceMatrix", metavar= "strFileDistanceMatrix", default=None, help="Specify the path for outputing the distance matrix (if interested). Default this will not output.") | 
|  | 42 argp.add_argument("-C","--CoordinatesMatrix", dest="strFileCoordinatesMatrix", metavar= "strFileCoordinatesMatrix", default=None, help="Specify the path for outputing the x,y coordinates matrix (Dim 1 and 2). Default this will not output.") | 
|  | 43 | 
|  | 44 # Unifrac arguments | 
|  | 45 argp.add_argument("-t","--unifracTree", dest="istrmTree", metavar="UnifracTreeFile", default=None, help="Optional file only needed for UniFrac calculations.") | 
|  | 46 argp.add_argument("-e","--unifracEnv", dest="istrmEnvr", metavar="UnifracEnvFile", default=None, help="Optional file only needed for UniFrac calculations.") | 
|  | 47 argp.add_argument("-c","--unifracColor", dest="fileUnifracColor", metavar="UnifracColorFile", default = None, help="A text file indicating the groupings of metadata to color. Each line in the file is a group to color. An example file line would be  'GroupName:ID,ID,ID,ID'") | 
|  | 48 | 
|  | 49 argp.add_argument("strFileAbund", metavar = "Abundance file", nargs="?", help ="Input data file") | 
|  | 50 | 
|  | 51 args = argp.parse_args( ) | 
|  | 52 | 
|  | 53 #Read in abundance table | 
|  | 54 abndTable = None | 
|  | 55 if args.strFileAbund: | 
|  | 56   abndTable = AbundanceTable.funcMakeFromFile(args.strFileAbund, | 
|  | 57                              cDelimiter = args.cFileDelimiter, | 
|  | 58                              sMetadataID = args.sIDName, | 
|  | 59                              sLastMetadata = args.sLastMetadataName, | 
|  | 60                              cFeatureNameDelimiter= args.cFeatureNameDelimiter) | 
|  | 61 | 
|  | 62   #Normalize if need | 
|  | 63   if args.fDoSumData: | 
|  | 64     abndTable.funcSumClades() | 
|  | 65 | 
|  | 66   #Sum if needed | 
|  | 67   if args.fDoNormData: | 
|  | 68     abndTable.funcNormalize() | 
|  | 69 | 
|  | 70 #Get the metadata to paint | 
|  | 71 lsKeys = None | 
|  | 72 if abndTable: | 
|  | 73   lsKeys = abndTable.funcGetMetadataCopy().keys() if not args.sLabel else [args.sLabel] | 
|  | 74 | 
|  | 75 #Get pieces of output file | 
|  | 76 if not args.strOutFile: | 
|  | 77   if not args.strFileAbund: | 
|  | 78     args.strOutFile = os.path.splitext(os.path.basename(args.istrmEnvr))[0]+"-pcoa.pdf" | 
|  | 79   else: | 
|  | 80     args.strOutFile = os.path.splitext(os.path.basename(args.strFileAbund))[0]+"-pcoa.pdf" | 
|  | 81 lsFilePieces = os.path.splitext(args.strOutFile) | 
|  | 82 | 
|  | 83 # Make PCoA object | 
|  | 84 # Get PCoA object and plot | 
|  | 85 pcoa = PCoA() | 
|  | 86 if(not args.strMetric in [Metric.c_strUnifracUnweighted,Metric.c_strUnifracWeighted]) and abndTable: | 
|  | 87   pcoa.loadData(abndTable,True) | 
|  | 88 # Optional args.strFileDistanceMatrix if not none will force a printing of the distance measures to the path in args.strFileDistanceMatrix | 
|  | 89 pcoa.run(tempDistanceMetric=args.strMetric, iDims=2, strDistanceMatrixFile=args.strFileDistanceMatrix, istrmTree=args.istrmTree, istrmEnvr=args.istrmEnvr) | 
|  | 90 | 
|  | 91 # Write dim 1 and 2 coordinates to file | 
|  | 92 if args.strFileCoordinatesMatrix: | 
|  | 93   lsIds = pcoa.funcGetIDs() | 
|  | 94   mtrxCoordinates = pcoa.funcGetCoordinates() | 
|  | 95   csvrCoordinates = csv.writer(open(args.strFileCoordinatesMatrix, 'w')) | 
|  | 96   csvrCoordinates.writerow(["ID","Dimension_1","Dimension_2"]) | 
|  | 97   for x in xrange(mtrxCoordinates.shape[0]): | 
|  | 98     strId = lsIds[x] if lsIds else "" | 
|  | 99     csvrCoordinates.writerow([strId]+mtrxCoordinates[x].tolist()) | 
|  | 100 | 
|  | 101 # Paint metadata | 
|  | 102 if lsKeys: | 
|  | 103   for iIndex in xrange(len(lsKeys)): | 
|  | 104     lsMetadata = abndTable.funcGetMetadata(lsKeys[iIndex]) | 
|  | 105 | 
|  | 106     pcoa.plotList(lsLabelList = lsMetadata, | 
|  | 107       strOutputFileName = lsFilePieces[0]+"-"+lsKeys[iIndex]+lsFilePieces[1], | 
|  | 108       iSize=20, | 
|  | 109       dAlpha=1.0, | 
|  | 110       charForceColor=None, | 
|  | 111       charForceShape=None, | 
|  | 112       fInvert=False, | 
|  | 113       iDim1=1, | 
|  | 114       iDim2=2) | 
|  | 115 | 
|  | 116 if args.strMetric in [Metric.c_strUnifracUnweighted,Metric.c_strUnifracWeighted]: | 
|  | 117 | 
|  | 118   c_sNotGiven = "Not_specified" | 
|  | 119 | 
|  | 120   lsIds = pcoa.funcGetIDs() | 
|  | 121   lsGroupLabels = [c_sNotGiven for s in lsIds] | 
|  | 122 | 
|  | 123   if args.fileUnifracColor: | 
|  | 124 | 
|  | 125     # Read color file and make a dictionary to convert ids | 
|  | 126     lsColorLines = csv.reader(open(args.fileUnifracColor)) | 
|  | 127     dictConvertIDToGroup = {} | 
|  | 128     for lsLine in lsColorLines: | 
|  | 129       if lsLine: | 
|  | 130         sGroupID, sFirstID = lsLine[0].split(":") | 
|  | 131         dictConvertIDToGroup.update(dict([(sID,sGroupID) for sID in [sFirstID]+lsLine[1:]])) | 
|  | 132 | 
|  | 133     lsGroupLabels = [dictConvertIDToGroup.get(sID,c_sNotGiven) for sID in lsIds] | 
|  | 134 | 
|  | 135   pcoa.plotList(lsLabelList = lsGroupLabels, | 
|  | 136       strOutputFileName = lsFilePieces[0]+"-"+args.strMetric+lsFilePieces[1], | 
|  | 137       iSize=20, | 
|  | 138       dAlpha=1.0, | 
|  | 139       charForceColor=None, | 
|  | 140       charForceShape=None, | 
|  | 141       fInvert=False, | 
|  | 142       iDim1=1, | 
|  | 143       iDim2=2) |