annotate exseq_multi.py @ 6:612269711364 draft default tip

Uploaded
author p.lucas
date Wed, 26 Sep 2018 08:48:08 -0400
parents 746e286edff0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
1 #!/usr/bin/python
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
2 #-*- coding: utf-8 -*-
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
3
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
4 """
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
5
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
6 Scripts ayant pour rôle l'extraction de tous les noms de segment contenus dans un fichier genbank.
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
7 Réalisé par Pierrick Lucas.
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
8 Usage : python Extract_all_segment.py --input_files sequences.gb
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
9
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
10 """
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
11
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
12 # Importation des librairies.
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
13 import os, sys, argparse, re
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
14 from Bio import SeqIO
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
15
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
16
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
17 ##### MAIN
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
18 def __main__():
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
19 # Options :
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
20 parser = argparse.ArgumentParser(description='''Extract sequence of multifasta file from position start/end.''', epilog='''This script need few options, use -h to see it.''')
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
21 parser.add_argument('-i', '--input_file', dest='infile', help='Multifasta file.')
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
22 parser.add_argument('-s', '--start_position', dest='start', help='Start position to extract.')
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
23 parser.add_argument('-e', '--end_position', dest='end', help='End position to extract.')
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
24 parser.add_argument('-o', '--output_file', dest='outfile', help='Output file.')
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
25
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
26 # Error :
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
27 if len(sys.argv)==1 or len(sys.argv)>9 or len(sys.argv)<7 :
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
28 parser.print_help()
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
29 sys.exit(1)
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
30
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
31 # Get options :
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
32 options = parser.parse_args()
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
33 infile = options.infile
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
34 spos = options.start
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
35 epos = options.end
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
36 outfile = options.outfile
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
37
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
38 # Variables :
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
39 if outfile is None:
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
40 withoutext, justext = os.path.splitext(infile)
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
41 outputfile = open(withoutext+"_extract_subseq_"+spos+"_to_"+epos+".fasta","w")
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
42 else:
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
43 outputfile = open(outfile,"w")
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
44
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
45 # Ouputs :
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
46 with open(infile, "rU") as inf:
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
47 # Input treatment :
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
48 for rec in SeqIO.parse(inf, "fasta"):
6
612269711364 Uploaded
p.lucas
parents: 0
diff changeset
49 outputfile.write('>'+rec.id+'\n')
0
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
50 outputfile.write(str(rec.seq[int(spos):int(epos)])+'\n')
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
51
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
52 # Fermeture des fichiers.
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
53 outputfile.close()
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
54
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
55 #### MAIN END
746e286edff0 Uploaded
p.lucas
parents:
diff changeset
56 if __name__ == "__main__": __main__()