view bin/setTableDefaults.py @ 2:4b4d858c0aa9 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/ngsplot commit b'3611708386b2a2495ac9daee71a32cd990f2aa16\n'-dirty
author artbio
date Fri, 08 Dec 2017 09:36:11 -0500
parents 3ca58369469c
children
line wrap: on
line source

#!/usr/bin/env python

import sys
import string

flankDict = {
    "tss":2000,
    "tes":2000,
    "genebody":2000,
    "exon":500,
    "cgi":500,
    "enhancer":1500,
    "dhs":1000,
}

pointLabDict = {
    "tss":"TSS",
    "tes":"TES",
    "genebody":"TSS,TES",
    "exon":"Acceptor,Donor",
    "cgi":"Left,Right",
    "enhancer":"Enhancer",
    "dhs":"Left,Right",
}

# FItype = {
#     "tss":"endsite",
#     "tes":"endsite",
#     "genebody":"datatype",
#     "exon":"subregion",
#     "cgi":"subregion",
#     "enhancer":"subregion",
#     "dhs":"subregion"
# }

in_f = file(sys.argv[1])
out_f = file(sys.argv[2], "w")
out_f.write("\t".join(["Genome", "DefaultDB", "Region", "DefaultFI1", \
                       "DefaultFI2", "DefaultFI3", "PointLab", "Flank"]) + \
            "\n")

# while(True):
for line in in_f:
    # line = in_f.readline()
    # if len(line) == 0:
    #     break;
    # line = string.strip(line)

    lineL = line.rstrip().split("\t")
    genome = lineL[0]
    defaultDB = lineL[1]
    region = lineL[2]

    if region == "cgi":
        fi_1 = "NA"
        fi_2 = "ProximalPromoter"
        fi_3 = "protein_coding"
    elif region == "dhs":
        fi_1 = "H1hesc"
        fi_2 = "ProximalPromoter"
        fi_3 = "protein_coding"
    elif region == "exon":
        fi_1 = "chipseq"
        fi_2 = "canonical"
        fi_3 = "protein_coding"
    elif region == "genebody":
        fi_1 = "chipseq"
        fi_2 = "NA"
        fi_3 = "protein_coding"
    elif (region == "enhancer") and (genome == "hg19"):
        fi_1 = "H1hesc"
        fi_2 = "genebody"
        fi_3 = "protein_coding"
    elif (region == "enhancer") and (genome == "mm9"):
        fi_1 = "mESC"
        fi_2 = "genebody"
        fi_3 = "protein_coding"
    out_f.write("\t".join([genome, defaultDB, region, fi_1, fi_2, fi_3, \
                pointLabDict[region], str(flankDict[region])]) + "\n")
    
    # Extra: TSS and TES if region = genebody.
    if region == "genebody":
        region = "tss"
        out_f.write("\t".join([genome, defaultDB, region, fi_1, fi_2, fi_3, \
                    pointLabDict[region], str(flankDict[region])]) + "\n")
        region = "tes"
        out_f.write("\t".join([genome, defaultDB, region, fi_1, fi_2, fi_3, \
                    pointLabDict[region], str(flankDict[region])]) + "\n")

in_f.close()
out_f.close()