comparison fasta_extract.py @ 14:a234ed464674 draft default tip

Uploaded
author greg
date Sun, 10 Jan 2016 15:01:05 -0500
parents a5d7ed2680c3
children
comparison
equal deleted inserted replaced
13:a5d7ed2680c3 14:a234ed464674
41 parser.add_argument('--strand', dest='strand', help='Consider strandedness: reverse complement extracted sequence on reverse strand.') 41 parser.add_argument('--strand', dest='strand', help='Consider strandedness: reverse complement extracted sequence on reverse strand.')
42 args = parser.parse_args() 42 args = parser.parse_args()
43 43
44 fasta = Fasta(args.genome_file) 44 fasta = Fasta(args.genome_file)
45 45
46 dh = open('debug.log', 'wb')
47 for (input_filename, hid) in args.inputs: 46 for (input_filename, hid) in args.inputs:
48 extend_existing = args.extend_existing == 'existing' 47 extend_existing = args.extend_existing == 'existing'
49 consider_strand = args.strand == 'yes' 48 consider_strand = args.strand == 'yes'
50 49
51 reader = csv.reader(open(input_filename, 'rU'), delimiter='\t') 50 reader = csv.reader(open(input_filename, 'rU'), delimiter='\t')
52 fasta_output_path = get_output_path(hid, 51 fasta_output_path = get_output_path(hid,
53 args.subtract_from_start, 52 args.subtract_from_start,
54 args.add_to_end, 53 args.add_to_end,
55 extend_existing, 54 extend_existing,
56 consider_strand) 55 consider_strand)
57 dh.write('\n fasta_output_path: %s\n' % str(fasta_output_path))
58 output = open(fasta_output_path, 'wb') 56 output = open(fasta_output_path, 'wb')
59 gff_output_path = get_output_path(hid, 57 gff_output_path = get_output_path(hid,
60 args.subtract_from_start, 58 args.subtract_from_start,
61 args.add_to_end, 59 args.add_to_end,
62 extend_existing, 60 extend_existing,
63 consider_strand, 61 consider_strand,
64 orphan=True) 62 orphan=True)
65 dh.write('\n gff_output_path: %s\n' % str(gff_output_path))
66 orphan_writer = csv.writer(open(gff_output_path, 'wb'), delimiter='\t') 63 orphan_writer = csv.writer(open(gff_output_path, 'wb'), delimiter='\t')
67 64
68 for row in reader: 65 for row in reader:
69 dh.write('\n row: %s\n' % str(row))
70 if len(row) != 9 or row[0].startswith('#'): 66 if len(row) != 9 or row[0].startswith('#'):
71 continue 67 continue
72 try: 68 cname = row[0]
73 cname = row[0] 69 start = int(row[3])
74 start = int(row[3]) 70 end = int(row[4])
75 end = int(row[4]) 71 strand = row[6]
76 strand = row[6] 72 if extend_existing:
77 if extend_existing: 73 start -= args.subtract_from_start
78 start -= args.subtract_from_start 74 end += args.add_to_end
79 end += args.add_to_end 75 else:
80 else: 76 midpoint = (start + end) // 2
81 midpoint = (start + end) // 2 77 start = midpoint - args.subtract_from_start
82 start = midpoint - args.subtract_from_start 78 end = midpoint + args.add_to_end
83 end = midpoint + args.add_to_end 79 if 1 <= start and end <= len(fasta[cname]):
84 if 1 <= start and end <= len(fasta[cname]): 80 output.write('>%s:%s-%s_%s\n' % (cname, start, end, strand))
85 output.write('>%s:%s-%s_%s\n' % (cname, start, end, strand)) 81 bases = fasta[cname][start-1:end]
86 bases = fasta[cname][start-1:end] 82 if consider_strand and strand == '-':
87 if consider_strand and strand == '-': 83 bases = reverse_complement(bases)
88 bases = reverse_complement(bases) 84 output.write('%s\n' % bases)
89 dh.write('\n bases: %s\n' % str(bases)) 85 else:
90 output.write('%s\n' % bases) 86 orphan_writer.writerow(row)
91 else:
92 orphan_writer.writerow(row)
93 except Exception, e:
94 stop_err(str(e))
95 output.close() 87 output.close()
96 dh.close()