Mercurial > repos > greg > fasta_extract
comparison fasta_extract.py @ 12:da6ab598f025 draft
Uploaded
author | greg |
---|---|
date | Sun, 10 Jan 2016 14:53:59 -0500 |
parents | 48f6e9f1c19d |
children | a5d7ed2680c3 |
comparison
equal
deleted
inserted
replaced
11:48f6e9f1c19d | 12:da6ab598f025 |
---|---|
22 format = 'gff' | 22 format = 'gff' |
23 output_dir = 'output_orphan_dir' | 23 output_dir = 'output_orphan_dir' |
24 else: | 24 else: |
25 format = 'fasta' | 25 format = 'fasta' |
26 output_dir = 'output_dir' | 26 output_dir = 'output_dir' |
27 return os.path.join(output_dir, '%s_on_data_%d.%s' % (attrs, hid, format)) | 27 return os.path.join(output_dir, '%s_on_data_%s.%s' % (attrs, hid, format)) |
28 | 28 |
29 | 29 |
30 def stop_err(msg): | 30 def stop_err(msg): |
31 sys.stderr.write(msg) | 31 sys.stderr.write(msg) |
32 sys.exit(1) | 32 sys.exit(1) |
41 parser.add_argument('--strand', dest='strand', help='Consider strandedness: reverse complement extracted sequence on reverse strand.') | 41 parser.add_argument('--strand', dest='strand', help='Consider strandedness: reverse complement extracted sequence on reverse strand.') |
42 args = parser.parse_args() | 42 args = parser.parse_args() |
43 | 43 |
44 fasta = Fasta(args.genome_file) | 44 fasta = Fasta(args.genome_file) |
45 | 45 |
46 dh = open('debug.log', 'wb') | |
46 for (input_filename, hid) in args.inputs: | 47 for (input_filename, hid) in args.inputs: |
47 hid = int(hid) | |
48 extend_existing = args.extend_existing == 'existing' | 48 extend_existing = args.extend_existing == 'existing' |
49 consider_strand = args.strand == 'yes' | 49 consider_strand = args.strand == 'yes' |
50 | |
50 reader = csv.reader(open(input_filename, 'rU'), delimiter='\t') | 51 reader = csv.reader(open(input_filename, 'rU'), delimiter='\t') |
51 fasta_output_path = get_output_path(hid, | 52 fasta_output_path = get_output_path(hid, |
52 args.subtract_from_start, | 53 args.subtract_from_start, |
53 args.add_to_end, | 54 args.add_to_end, |
54 extend_existing, | 55 extend_existing, |
55 consider_strand) | 56 consider_strand) |
57 dh.write('\n fasta_output_path: %s\n' % str(fasta_output_path)) | |
56 output = open(fasta_output_path, 'wb') | 58 output = open(fasta_output_path, 'wb') |
57 gff_output_path = get_output_path(hid, | 59 gff_output_path = get_output_path(hid, |
58 args.subtract_from_start, | 60 args.subtract_from_start, |
59 args.add_to_end, | 61 args.add_to_end, |
60 extend_existing, | 62 extend_existing, |
61 consider_strand, | 63 consider_strand, |
62 orphan=True) | 64 orphan=True) |
65 dh.write('\n gff_output_path: %s\n' % str(gff_output_path)) | |
63 orphan_writer = csv.writer(open(gff_output_path, 'wb'), delimiter='\t') | 66 orphan_writer = csv.writer(open(gff_output_path, 'wb'), delimiter='\t') |
67 | |
64 for row in reader: | 68 for row in reader: |
69 dh.write('\n row: %s\n' % str(row)) | |
65 if len(row) != 9 or row[0].startswith('#'): | 70 if len(row) != 9 or row[0].startswith('#'): |
66 continue | 71 continue |
67 try: | 72 try: |
68 cname = row[0] | 73 cname = row[0] |
69 start = int(row[3]) | 74 start = int(row[3]) |
79 if 1 <= start and end <= len(fasta[cname]): | 84 if 1 <= start and end <= len(fasta[cname]): |
80 output.write('>%s:%s-%s_%s\n' % (cname, start, end, strand)) | 85 output.write('>%s:%s-%s_%s\n' % (cname, start, end, strand)) |
81 bases = fasta[cname][start-1:end] | 86 bases = fasta[cname][start-1:end] |
82 if consider_strand and strand == '-': | 87 if consider_strand and strand == '-': |
83 bases = reverse_complement(bases) | 88 bases = reverse_complement(bases) |
89 dh.write('\n bases: %s\n' % str(bases)) | |
84 output.write('%s\n' % bases) | 90 output.write('%s\n' % bases) |
85 else: | 91 else: |
86 orphan_writer.writerow(row) | 92 orphan_writer.writerow(row) |
87 except Exception, e: | 93 except Exception, e: |
88 stop_err(str(e)) | 94 stop_err(str(e)) |
89 finally: | 95 finally: |
90 output.close() | 96 output.close() |
97 dh.close() |