annotate mergegffs.py @ 6:77c005f344df draft

planemo upload commit 7bc2a91afe5fcf29893e582382aa874eacb1c1ca-dirty
author eduardo
date Fri, 07 Apr 2017 18:39:37 -0400
parents 2b2efd0c4df8
children 79d6bb2e6a5d
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2b2efd0c4df8 planemo upload commit 7bc2a91afe5fcf29893e582382aa874eacb1c1ca
eduardo
parents:
diff changeset
1 import sys
2b2efd0c4df8 planemo upload commit 7bc2a91afe5fcf29893e582382aa874eacb1c1ca
eduardo
parents:
diff changeset
2 import os
2b2efd0c4df8 planemo upload commit 7bc2a91afe5fcf29893e582382aa874eacb1c1ca
eduardo
parents:
diff changeset
3 import gffutils
2b2efd0c4df8 planemo upload commit 7bc2a91afe5fcf29893e582382aa874eacb1c1ca
eduardo
parents:
diff changeset
4 import argparse
2b2efd0c4df8 planemo upload commit 7bc2a91afe5fcf29893e582382aa874eacb1c1ca
eduardo
parents:
diff changeset
5
2b2efd0c4df8 planemo upload commit 7bc2a91afe5fcf29893e582382aa874eacb1c1ca
eduardo
parents:
diff changeset
6
2b2efd0c4df8 planemo upload commit 7bc2a91afe5fcf29893e582382aa874eacb1c1ca
eduardo
parents:
diff changeset
7
2b2efd0c4df8 planemo upload commit 7bc2a91afe5fcf29893e582382aa874eacb1c1ca
eduardo
parents:
diff changeset
8
2b2efd0c4df8 planemo upload commit 7bc2a91afe5fcf29893e582382aa874eacb1c1ca
eduardo
parents:
diff changeset
9 def main(argv, wayout):
2b2efd0c4df8 planemo upload commit 7bc2a91afe5fcf29893e582382aa874eacb1c1ca
eduardo
parents:
diff changeset
10 if not len(argv):
2b2efd0c4df8 planemo upload commit 7bc2a91afe5fcf29893e582382aa874eacb1c1ca
eduardo
parents:
diff changeset
11 argv.append("-h")
2b2efd0c4df8 planemo upload commit 7bc2a91afe5fcf29893e582382aa874eacb1c1ca
eduardo
parents:
diff changeset
12 parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter, description=__doc__)
2b2efd0c4df8 planemo upload commit 7bc2a91afe5fcf29893e582382aa874eacb1c1ca
eduardo
parents:
diff changeset
13 parser.add_argument('-m','--genemap', help="mapping of genes to transcripts")
2b2efd0c4df8 planemo upload commit 7bc2a91afe5fcf29893e582382aa874eacb1c1ca
eduardo
parents:
diff changeset
14 parser.add_argument('-p','--peptides',required=True, help="peptide prediction gff")
2b2efd0c4df8 planemo upload commit 7bc2a91afe5fcf29893e582382aa874eacb1c1ca
eduardo
parents:
diff changeset
15 parser.add_argument('-b','--blastp', help="blastp outfmt6 results for peptides")
2b2efd0c4df8 planemo upload commit 7bc2a91afe5fcf29893e582382aa874eacb1c1ca
eduardo
parents:
diff changeset
16 parser.add_argument('-f','--pfam', help="hmmer results for peptides")
2b2efd0c4df8 planemo upload commit 7bc2a91afe5fcf29893e582382aa874eacb1c1ca
eduardo
parents:
diff changeset
17 parser.add_argument('-t','--tmhmm', help="tmhmm results for peptides")
2b2efd0c4df8 planemo upload commit 7bc2a91afe5fcf29893e582382aa874eacb1c1ca
eduardo
parents:
diff changeset
18 parser.add_argument('-s','--signalP', help="signalP results for peptides")
2b2efd0c4df8 planemo upload commit 7bc2a91afe5fcf29893e582382aa874eacb1c1ca
eduardo
parents:
diff changeset
19 parser.add_argument('-d','--database', required=True, help="gff database to load or create")
2b2efd0c4df8 planemo upload commit 7bc2a91afe5fcf29893e582382aa874eacb1c1ca
eduardo
parents:
diff changeset
20 parser.add_argument('-o','--output', required=True, help="output in gff format")
2b2efd0c4df8 planemo upload commit 7bc2a91afe5fcf29893e582382aa874eacb1c1ca
eduardo
parents:
diff changeset
21 args = parser.parse_args(argv)
2b2efd0c4df8 planemo upload commit 7bc2a91afe5fcf29893e582382aa874eacb1c1ca
eduardo
parents:
diff changeset
22
2b2efd0c4df8 planemo upload commit 7bc2a91afe5fcf29893e582382aa874eacb1c1ca
eduardo
parents:
diff changeset
23
2b2efd0c4df8 planemo upload commit 7bc2a91afe5fcf29893e582382aa874eacb1c1ca
eduardo
parents:
diff changeset
24 db = gffutils.create_db(args.peptides, dbfn=args.database, force=True, keep_order=True,merge_strategy='merge', sort_attribute_values=True)
2b2efd0c4df8 planemo upload commit 7bc2a91afe5fcf29893e582382aa874eacb1c1ca
eduardo
parents:
diff changeset
25 if args.blastp == True:
2b2efd0c4df8 planemo upload commit 7bc2a91afe5fcf29893e582382aa874eacb1c1ca
eduardo
parents:
diff changeset
26 os.system("blast2genomegff.py -b "+args.blastp+" -g -d "+args.peptides+" -p blastp -t protein_match -T -x > blastp.gff")
2b2efd0c4df8 planemo upload commit 7bc2a91afe5fcf29893e582382aa874eacb1c1ca
eduardo
parents:
diff changeset
27 db=db.update(args.blastp)
2b2efd0c4df8 planemo upload commit 7bc2a91afe5fcf29893e582382aa874eacb1c1ca
eduardo
parents:
diff changeset
28 if args.blastp == True:
2b2efd0c4df8 planemo upload commit 7bc2a91afe5fcf29893e582382aa874eacb1c1ca
eduardo
parents:
diff changeset
29 os.system("convert2gff.py -i "+args.signalP+" -g "+args.peptides+" -T -t signalP > signalp.gff")
2b2efd0c4df8 planemo upload commit 7bc2a91afe5fcf29893e582382aa874eacb1c1ca
eduardo
parents:
diff changeset
30 db=db.update(args.blastp)
2b2efd0c4df8 planemo upload commit 7bc2a91afe5fcf29893e582382aa874eacb1c1ca
eduardo
parents:
diff changeset
31 if args.blastp == True:
2b2efd0c4df8 planemo upload commit 7bc2a91afe5fcf29893e582382aa874eacb1c1ca
eduardo
parents:
diff changeset
32 os.system("convert2gff.py -i "+args.tmhmm+" -g "+args.peptides+" -T -t tmhmm > tmhmm.gff")
2b2efd0c4df8 planemo upload commit 7bc2a91afe5fcf29893e582382aa874eacb1c1ca
eduardo
parents:
diff changeset
33 db=db.update(args.blastp)
2b2efd0c4df8 planemo upload commit 7bc2a91afe5fcf29893e582382aa874eacb1c1ca
eduardo
parents:
diff changeset
34 if args.blastp == True:
2b2efd0c4df8 planemo upload commit 7bc2a91afe5fcf29893e582382aa874eacb1c1ca
eduardo
parents:
diff changeset
35 os.system("pfam2gff.py -i "+args.pfam+" -g "+args.peptides+" -T > PFAM.gff")
2b2efd0c4df8 planemo upload commit 7bc2a91afe5fcf29893e582382aa874eacb1c1ca
eduardo
parents:
diff changeset
36 db=db.update(args.blastp)
2b2efd0c4df8 planemo upload commit 7bc2a91afe5fcf29893e582382aa874eacb1c1ca
eduardo
parents:
diff changeset
37
2b2efd0c4df8 planemo upload commit 7bc2a91afe5fcf29893e582382aa874eacb1c1ca
eduardo
parents:
diff changeset
38 with open(args.output, 'w') as fout:
2b2efd0c4df8 planemo upload commit 7bc2a91afe5fcf29893e582382aa874eacb1c1ca
eduardo
parents:
diff changeset
39 for f in db.all_features():
2b2efd0c4df8 planemo upload commit 7bc2a91afe5fcf29893e582382aa874eacb1c1ca
eduardo
parents:
diff changeset
40 fout.write(str(f) + '\n')
2b2efd0c4df8 planemo upload commit 7bc2a91afe5fcf29893e582382aa874eacb1c1ca
eduardo
parents:
diff changeset
41 if __name__ == "__main__":
2b2efd0c4df8 planemo upload commit 7bc2a91afe5fcf29893e582382aa874eacb1c1ca
eduardo
parents:
diff changeset
42 main(sys.argv[1:],sys.stdout)