Mercurial > repos > p.lucas > extract_fasta_subsequence
view exseq_multi.py @ 0:746e286edff0 draft
Uploaded
author | p.lucas |
---|---|
date | Wed, 26 Sep 2018 08:26:17 -0400 |
parents | |
children | 612269711364 |
line wrap: on
line source
#!/usr/bin/python #-*- coding: utf-8 -*- """ Scripts ayant pour rôle l'extraction de tous les noms de segment contenus dans un fichier genbank. Réalisé par Pierrick Lucas. Usage : python Extract_all_segment.py --input_files sequences.gb """ # Importation des librairies. import os, sys, argparse, re from Bio import SeqIO ##### MAIN def __main__(): # Options : parser = argparse.ArgumentParser(description='''Extract sequence of multifasta file from position start/end.''', epilog='''This script need few options, use -h to see it.''') parser.add_argument('-i', '--input_file', dest='infile', help='Multifasta file.') parser.add_argument('-s', '--start_position', dest='start', help='Start position to extract.') parser.add_argument('-e', '--end_position', dest='end', help='End position to extract.') parser.add_argument('-o', '--output_file', dest='outfile', help='Output file.') # Error : if len(sys.argv)==1 or len(sys.argv)>9 or len(sys.argv)<7 : parser.print_help() sys.exit(1) # Get options : options = parser.parse_args() infile = options.infile spos = options.start epos = options.end outfile = options.outfile # Variables : if outfile is None: withoutext, justext = os.path.splitext(infile) outputfile = open(withoutext+"_extract_subseq_"+spos+"_to_"+epos+".fasta","w") else: outputfile = open(outfile,"w") # Ouputs : with open(infile, "rU") as inf: # Input treatment : for rec in SeqIO.parse(inf, "fasta"): outputfile.write(rec.id+'\n') outputfile.write(str(rec.seq[int(spos):int(epos)])+'\n') # Fermeture des fichiers. outputfile.close() #### MAIN END if __name__ == "__main__": __main__()