comparison fasta_extract.py @ 12:da6ab598f025 draft

Uploaded
author greg
date Sun, 10 Jan 2016 14:53:59 -0500
parents 48f6e9f1c19d
children a5d7ed2680c3
comparison
equal deleted inserted replaced
11:48f6e9f1c19d 12:da6ab598f025
22 format = 'gff' 22 format = 'gff'
23 output_dir = 'output_orphan_dir' 23 output_dir = 'output_orphan_dir'
24 else: 24 else:
25 format = 'fasta' 25 format = 'fasta'
26 output_dir = 'output_dir' 26 output_dir = 'output_dir'
27 return os.path.join(output_dir, '%s_on_data_%d.%s' % (attrs, hid, format)) 27 return os.path.join(output_dir, '%s_on_data_%s.%s' % (attrs, hid, format))
28 28
29 29
30 def stop_err(msg): 30 def stop_err(msg):
31 sys.stderr.write(msg) 31 sys.stderr.write(msg)
32 sys.exit(1) 32 sys.exit(1)
41 parser.add_argument('--strand', dest='strand', help='Consider strandedness: reverse complement extracted sequence on reverse strand.') 41 parser.add_argument('--strand', dest='strand', help='Consider strandedness: reverse complement extracted sequence on reverse strand.')
42 args = parser.parse_args() 42 args = parser.parse_args()
43 43
44 fasta = Fasta(args.genome_file) 44 fasta = Fasta(args.genome_file)
45 45
46 dh = open('debug.log', 'wb')
46 for (input_filename, hid) in args.inputs: 47 for (input_filename, hid) in args.inputs:
47 hid = int(hid)
48 extend_existing = args.extend_existing == 'existing' 48 extend_existing = args.extend_existing == 'existing'
49 consider_strand = args.strand == 'yes' 49 consider_strand = args.strand == 'yes'
50
50 reader = csv.reader(open(input_filename, 'rU'), delimiter='\t') 51 reader = csv.reader(open(input_filename, 'rU'), delimiter='\t')
51 fasta_output_path = get_output_path(hid, 52 fasta_output_path = get_output_path(hid,
52 args.subtract_from_start, 53 args.subtract_from_start,
53 args.add_to_end, 54 args.add_to_end,
54 extend_existing, 55 extend_existing,
55 consider_strand) 56 consider_strand)
57 dh.write('\n fasta_output_path: %s\n' % str(fasta_output_path))
56 output = open(fasta_output_path, 'wb') 58 output = open(fasta_output_path, 'wb')
57 gff_output_path = get_output_path(hid, 59 gff_output_path = get_output_path(hid,
58 args.subtract_from_start, 60 args.subtract_from_start,
59 args.add_to_end, 61 args.add_to_end,
60 extend_existing, 62 extend_existing,
61 consider_strand, 63 consider_strand,
62 orphan=True) 64 orphan=True)
65 dh.write('\n gff_output_path: %s\n' % str(gff_output_path))
63 orphan_writer = csv.writer(open(gff_output_path, 'wb'), delimiter='\t') 66 orphan_writer = csv.writer(open(gff_output_path, 'wb'), delimiter='\t')
67
64 for row in reader: 68 for row in reader:
69 dh.write('\n row: %s\n' % str(row))
65 if len(row) != 9 or row[0].startswith('#'): 70 if len(row) != 9 or row[0].startswith('#'):
66 continue 71 continue
67 try: 72 try:
68 cname = row[0] 73 cname = row[0]
69 start = int(row[3]) 74 start = int(row[3])
79 if 1 <= start and end <= len(fasta[cname]): 84 if 1 <= start and end <= len(fasta[cname]):
80 output.write('>%s:%s-%s_%s\n' % (cname, start, end, strand)) 85 output.write('>%s:%s-%s_%s\n' % (cname, start, end, strand))
81 bases = fasta[cname][start-1:end] 86 bases = fasta[cname][start-1:end]
82 if consider_strand and strand == '-': 87 if consider_strand and strand == '-':
83 bases = reverse_complement(bases) 88 bases = reverse_complement(bases)
89 dh.write('\n bases: %s\n' % str(bases))
84 output.write('%s\n' % bases) 90 output.write('%s\n' % bases)
85 else: 91 else:
86 orphan_writer.writerow(row) 92 orphan_writer.writerow(row)
87 except Exception, e: 93 except Exception, e:
88 stop_err(str(e)) 94 stop_err(str(e))
89 finally: 95 finally:
90 output.close() 96 output.close()
97 dh.close()