Mercurial > repos > greg > fasta_extract
comparison fasta_extract.py @ 14:a234ed464674 draft default tip
Uploaded
author | greg |
---|---|
date | Sun, 10 Jan 2016 15:01:05 -0500 |
parents | a5d7ed2680c3 |
children |
comparison
equal
deleted
inserted
replaced
13:a5d7ed2680c3 | 14:a234ed464674 |
---|---|
41 parser.add_argument('--strand', dest='strand', help='Consider strandedness: reverse complement extracted sequence on reverse strand.') | 41 parser.add_argument('--strand', dest='strand', help='Consider strandedness: reverse complement extracted sequence on reverse strand.') |
42 args = parser.parse_args() | 42 args = parser.parse_args() |
43 | 43 |
44 fasta = Fasta(args.genome_file) | 44 fasta = Fasta(args.genome_file) |
45 | 45 |
46 dh = open('debug.log', 'wb') | |
47 for (input_filename, hid) in args.inputs: | 46 for (input_filename, hid) in args.inputs: |
48 extend_existing = args.extend_existing == 'existing' | 47 extend_existing = args.extend_existing == 'existing' |
49 consider_strand = args.strand == 'yes' | 48 consider_strand = args.strand == 'yes' |
50 | 49 |
51 reader = csv.reader(open(input_filename, 'rU'), delimiter='\t') | 50 reader = csv.reader(open(input_filename, 'rU'), delimiter='\t') |
52 fasta_output_path = get_output_path(hid, | 51 fasta_output_path = get_output_path(hid, |
53 args.subtract_from_start, | 52 args.subtract_from_start, |
54 args.add_to_end, | 53 args.add_to_end, |
55 extend_existing, | 54 extend_existing, |
56 consider_strand) | 55 consider_strand) |
57 dh.write('\n fasta_output_path: %s\n' % str(fasta_output_path)) | |
58 output = open(fasta_output_path, 'wb') | 56 output = open(fasta_output_path, 'wb') |
59 gff_output_path = get_output_path(hid, | 57 gff_output_path = get_output_path(hid, |
60 args.subtract_from_start, | 58 args.subtract_from_start, |
61 args.add_to_end, | 59 args.add_to_end, |
62 extend_existing, | 60 extend_existing, |
63 consider_strand, | 61 consider_strand, |
64 orphan=True) | 62 orphan=True) |
65 dh.write('\n gff_output_path: %s\n' % str(gff_output_path)) | |
66 orphan_writer = csv.writer(open(gff_output_path, 'wb'), delimiter='\t') | 63 orphan_writer = csv.writer(open(gff_output_path, 'wb'), delimiter='\t') |
67 | 64 |
68 for row in reader: | 65 for row in reader: |
69 dh.write('\n row: %s\n' % str(row)) | |
70 if len(row) != 9 or row[0].startswith('#'): | 66 if len(row) != 9 or row[0].startswith('#'): |
71 continue | 67 continue |
72 try: | 68 cname = row[0] |
73 cname = row[0] | 69 start = int(row[3]) |
74 start = int(row[3]) | 70 end = int(row[4]) |
75 end = int(row[4]) | 71 strand = row[6] |
76 strand = row[6] | 72 if extend_existing: |
77 if extend_existing: | 73 start -= args.subtract_from_start |
78 start -= args.subtract_from_start | 74 end += args.add_to_end |
79 end += args.add_to_end | 75 else: |
80 else: | 76 midpoint = (start + end) // 2 |
81 midpoint = (start + end) // 2 | 77 start = midpoint - args.subtract_from_start |
82 start = midpoint - args.subtract_from_start | 78 end = midpoint + args.add_to_end |
83 end = midpoint + args.add_to_end | 79 if 1 <= start and end <= len(fasta[cname]): |
84 if 1 <= start and end <= len(fasta[cname]): | 80 output.write('>%s:%s-%s_%s\n' % (cname, start, end, strand)) |
85 output.write('>%s:%s-%s_%s\n' % (cname, start, end, strand)) | 81 bases = fasta[cname][start-1:end] |
86 bases = fasta[cname][start-1:end] | 82 if consider_strand and strand == '-': |
87 if consider_strand and strand == '-': | 83 bases = reverse_complement(bases) |
88 bases = reverse_complement(bases) | 84 output.write('%s\n' % bases) |
89 dh.write('\n bases: %s\n' % str(bases)) | 85 else: |
90 output.write('%s\n' % bases) | 86 orphan_writer.writerow(row) |
91 else: | |
92 orphan_writer.writerow(row) | |
93 except Exception, e: | |
94 stop_err(str(e)) | |
95 output.close() | 87 output.close() |
96 dh.close() |