diff cherry_pick_fasta.py @ 4:30d0cba983be draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 8384f1bb5378232bbe78319e06a3522674c7c1fe"
author artbio
date Fri, 08 Apr 2022 16:56:08 +0000
parents d04b7de95782
children 0ca9683dbcae
line wrap: on
line diff
--- a/cherry_pick_fasta.py	Tue Apr 05 23:41:56 2022 +0000
+++ b/cherry_pick_fasta.py	Fri Apr 08 16:56:08 2022 +0000
@@ -1,10 +1,5 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# Chery pick of fasta sequences satisfying a query string in their header/name
 import argparse
 
-from Bio import SeqIO
-
 
 def Parser():
     the_parser = argparse.ArgumentParser(
@@ -68,7 +63,17 @@
 
 
 def buid_fasta_dict(fasta):
-    seq_dict = {rec.id: rec.seq for rec in SeqIO.parse(fasta, "fasta")}
+    seq_dict = dict()
+    f = open(fasta, 'r')
+    content = f.read()
+    segmented_content = content.split('>')
+    segmented_content = segmented_content[1:]
+    for seq in segmented_content:
+        sliced_seq = seq.split('\n')
+        header = sliced_seq[0]
+        sliced_seq = sliced_seq[1:]
+        sequence = ''.join(sliced_seq)
+        seq_dict[header] = sequence
     return seq_dict