0
|
1 #!/usr/bin/python
|
|
2 #-*- coding: utf-8 -*-
|
|
3
|
|
4 """
|
|
5
|
|
6 Scripts ayant pour rôle l'extraction de tous les noms de segment contenus dans un fichier genbank.
|
|
7 Réalisé par Pierrick Lucas.
|
|
8 Usage : python Extract_all_segment.py --input_files sequences.gb
|
|
9
|
|
10 """
|
|
11
|
|
12 # Importation des librairies.
|
|
13 import os, sys, argparse, re
|
|
14 from Bio import SeqIO
|
|
15
|
|
16
|
|
17 ##### MAIN
|
|
18 def __main__():
|
|
19 # Options :
|
|
20 parser = argparse.ArgumentParser(description='''Extract sequence of multifasta file from position start/end.''', epilog='''This script need few options, use -h to see it.''')
|
|
21 parser.add_argument('-i', '--input_file', dest='infile', help='Multifasta file.')
|
|
22 parser.add_argument('-s', '--start_position', dest='start', help='Start position to extract.')
|
|
23 parser.add_argument('-e', '--end_position', dest='end', help='End position to extract.')
|
|
24 parser.add_argument('-o', '--output_file', dest='outfile', help='Output file.')
|
|
25
|
|
26 # Error :
|
|
27 if len(sys.argv)==1 or len(sys.argv)>9 or len(sys.argv)<7 :
|
|
28 parser.print_help()
|
|
29 sys.exit(1)
|
|
30
|
|
31 # Get options :
|
|
32 options = parser.parse_args()
|
|
33 infile = options.infile
|
|
34 spos = options.start
|
|
35 epos = options.end
|
|
36 outfile = options.outfile
|
|
37
|
|
38 # Variables :
|
|
39 if outfile is None:
|
|
40 withoutext, justext = os.path.splitext(infile)
|
|
41 outputfile = open(withoutext+"_extract_subseq_"+spos+"_to_"+epos+".fasta","w")
|
|
42 else:
|
|
43 outputfile = open(outfile,"w")
|
|
44
|
|
45 # Ouputs :
|
|
46 with open(infile, "rU") as inf:
|
|
47 # Input treatment :
|
|
48 for rec in SeqIO.parse(inf, "fasta"):
|
6
|
49 outputfile.write('>'+rec.id+'\n')
|
0
|
50 outputfile.write(str(rec.seq[int(spos):int(epos)])+'\n')
|
|
51
|
|
52 # Fermeture des fichiers.
|
|
53 outputfile.close()
|
|
54
|
|
55 #### MAIN END
|
|
56 if __name__ == "__main__": __main__()
|