view exseq_multi.py @ 0:746e286edff0 draft

Uploaded
author p.lucas
date Wed, 26 Sep 2018 08:26:17 -0400
parents
children 612269711364
line wrap: on
line source

#!/usr/bin/python
#-*- coding: utf-8 -*-

"""

Scripts ayant pour rôle l'extraction de tous les noms de segment contenus dans un fichier genbank.
Réalisé par Pierrick Lucas.
Usage : python Extract_all_segment.py --input_files sequences.gb

"""

# Importation des librairies.
import os, sys, argparse, re
from Bio import SeqIO


##### MAIN
def __main__():
  # Options :
  parser = argparse.ArgumentParser(description='''Extract sequence of multifasta file from position start/end.''', epilog='''This script need few options, use -h to see it.''')
  parser.add_argument('-i', '--input_file', dest='infile', help='Multifasta file.')
  parser.add_argument('-s', '--start_position', dest='start', help='Start position to extract.')
  parser.add_argument('-e', '--end_position', dest='end', help='End position to extract.')
  parser.add_argument('-o', '--output_file', dest='outfile', help='Output file.')

  # Error :
  if len(sys.argv)==1 or len(sys.argv)>9 or len(sys.argv)<7 :
    parser.print_help()
    sys.exit(1)

  # Get options :
  options = parser.parse_args()
  infile = options.infile
  spos = options.start
  epos = options.end
  outfile = options.outfile

  # Variables :
  if outfile is None:
    withoutext, justext = os.path.splitext(infile)
    outputfile = open(withoutext+"_extract_subseq_"+spos+"_to_"+epos+".fasta","w")
  else:
    outputfile = open(outfile,"w")
    
  # Ouputs :
  with open(infile, "rU") as inf:
    # Input treatment :
    for rec in SeqIO.parse(inf, "fasta"):
      outputfile.write(rec.id+'\n')
      outputfile.write(str(rec.seq[int(spos):int(epos)])+'\n')    

  # Fermeture des fichiers.               
  outputfile.close()

#### MAIN END
if __name__ == "__main__": __main__()