annotate spp/src/maqmap.c @ 6:ce08b0efa3fd draft

Uploaded
author zzhou
date Tue, 27 Nov 2012 16:11:40 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
1 #include <stdio.h>
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
2 #include <stdlib.h>
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
3 #include <assert.h>
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
4 #include <unistd.h>
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
5 #include "const.h"
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
6 #include "maqmap.h"
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
7
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
8 maqmap_t *maq_new_maqmap()
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
9 {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
10 maqmap_t *mm = (maqmap_t*)calloc(1, sizeof(maqmap_t));
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
11 mm->format = MAQMAP_FORMAT_NEW;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
12 return mm;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
13 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
14 void maq_delete_maqmap(maqmap_t *mm)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
15 {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
16 int i;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
17 if (mm == 0) return;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
18 for (i = 0; i < mm->n_ref; ++i)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
19 free(mm->ref_name[i]);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
20 free(mm->ref_name);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
21 free(mm->mapped_reads);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
22 free(mm);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
23 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
24 void maqmap_write_header(gzFile fp, const maqmap_t *mm)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
25 {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
26 int i, len;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
27 gzwrite(fp, &mm->format, sizeof(int));
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
28 gzwrite(fp, &mm->n_ref, sizeof(int));
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
29 for (i = 0; i != mm->n_ref; ++i) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
30 len = strlen(mm->ref_name[i]) + 1;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
31 gzwrite(fp, &len, sizeof(int));
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
32 gzwrite(fp, mm->ref_name[i], len);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
33 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
34 gzwrite(fp, &mm->n_mapped_reads, sizeof(bit64_t));
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
35 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
36 maqmap_t *maqmap_read_header(gzFile fp)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
37 {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
38 maqmap_t *mm;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
39 int k, len;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
40 mm = maq_new_maqmap();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
41 gzread(fp, &mm->format, sizeof(int));
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
42 if (mm->format != MAQMAP_FORMAT_NEW) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
43 if (mm->format > 0) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
44 fprintf(stderr, "** Obsolete map format is detected. Please use 'mapass2maq' command to convert the format.\n");
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
45 exit(3);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
46 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
47 assert(mm->format == MAQMAP_FORMAT_NEW);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
48 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
49 gzread(fp, &mm->n_ref, sizeof(int));
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
50 mm->ref_name = (char**)calloc(mm->n_ref, sizeof(char*));
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
51 for (k = 0; k != mm->n_ref; ++k) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
52 gzread(fp, &len, sizeof(int));
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
53 mm->ref_name[k] = (char*)malloc(len * sizeof(char));
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
54 gzread(fp, mm->ref_name[k], len);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
55 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
56 /* read number of mapped reads */
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
57 gzread(fp, &mm->n_mapped_reads, sizeof(bit64_t));
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
58 return mm;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
59 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
60
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
61 /* mapvalidate */
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
62
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
63 static void mapvalidate_core(gzFile fpin)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
64 {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
65 maqmap_t *m = maqmap_read_header(fpin);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
66 maqmap1_t *m1, mm1;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
67 bit64_t n = 0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
68 int i, l;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
69 bit64_t *cnt;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
70 m1 = &mm1;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
71 cnt = (bit64_t*)calloc(m->n_ref, 8);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
72 printf("[message] number of reference sequences: %d\n", m->n_ref);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
73 while ((l = maqmap_read1(fpin, m1)) != 0) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
74 if (l != sizeof(maqmap1_t)) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
75 printf("[fatal error] truncated map file.\n");
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
76 break;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
77 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
78 ++n;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
79 if ((int)m1->seqid >= m->n_ref) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
80 printf("[fatal error] maqmap1_t::seqid is invalid (%d >= %d).\n", m1->seqid, m->n_ref);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
81 break;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
82 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
83 ++cnt[m1->seqid];
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
84 if (m1->size >= MAX_READLEN - 1) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
85 printf("[faltal error] maqmap1_t::size is invalid (%d >= %d).\n", m1->size, MAX_READLEN - 1);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
86 break;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
87 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
88 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
89 if (m->n_mapped_reads != 0) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
90 if (m->n_mapped_reads != n) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
91 printf("[warning] maqmap1_t::n_mapped_reads is set, but not equals the real number (%llu != %llu).\n",
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
92 m->n_mapped_reads, n);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
93 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
94 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
95 for (i = 0; i != m->n_ref; ++i)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
96 printf("[message] %s : %llu\n", m->ref_name[i], cnt[i]);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
97 free(cnt);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
98 maq_delete_maqmap(m);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
99 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
100
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
101 /* mapview */
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
102
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
103 static void mapview_core(FILE *fpout, gzFile fpin, int is_verbose, int is_mm)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
104 {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
105 bit32_t j;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
106 maqmap_t *m = maqmap_read_header(fpin);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
107 maqmap1_t *m1, mm1;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
108 m1 = &mm1;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
109 while (maqmap_read1(fpin, m1)) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
110 fprintf(fpout, "%s\t%s\t%d\t%c\t%d\t%u\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d",
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
111 m1->name, m->ref_name[m1->seqid], (m1->pos>>1) + 1,
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
112 (m1->pos&1)? '-' : '+', m1->dist, m1->flag, m1->map_qual, (signed char)m1->seq[MAX_READLEN-1],
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
113 m1->alt_qual, m1->info1&0xf, m1->info2, m1->c[0], m1->c[1], m1->size);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
114 if (is_verbose) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
115 fputc('\t', fpout);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
116 for (j = 0; j != m1->size; ++j) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
117 if (m1->seq[j] == 0) fputc('n', fpout);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
118 else if ((m1->seq[j]&0x3f) < 27) fputc("acgt"[m1->seq[j]>>6&3], fpout);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
119 else fputc("ACGT"[m1->seq[j]>>6&3], fpout);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
120 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
121 fputc('\t', fpout);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
122 for (j = 0; j != m1->size; ++j)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
123 fputc((m1->seq[j]&0x3f) + 33, fpout);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
124 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
125 if (is_mm) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
126 bit64_t *p = (bit64_t*)(m1->seq + 55);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
127 fprintf(fpout, "\t%llx", *p);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
128 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
129 fputc('\n', fpout);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
130 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
131 maq_delete_maqmap(m);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
132 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
133
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
134 int ma_mapview(int argc, char *argv[])
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
135 {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
136 int c, is_verbose = 1, is_mm = 0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
137 while ((c = getopt(argc, argv, "bN")) >= 0) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
138 switch (c) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
139 case 'b': is_verbose = 0; break;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
140 case 'N': is_mm = 1; break;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
141 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
142 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
143 if (argc == optind) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
144 fprintf(stderr, "Usage: maq mapview [-bN] <in.map>\n");
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
145 return 1;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
146 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
147 gzFile fp = (strcmp(argv[optind], "-") == 0)? gzdopen(STDIN_FILENO, "r") : gzopen(argv[optind], "r");
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
148 mapview_core(stdout, fp, is_verbose, is_mm);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
149 gzclose(fp);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
150 return 0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
151 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
152
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
153 int ma_mapvalidate(int argc, char *argv[])
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
154 {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
155 gzFile fp;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
156 if (argc < 2) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
157 fprintf(stderr, "Usage: maq mapvalidate <in.map>\n");
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
158 return 1;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
159 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
160 fp = (strcmp(argv[optind], "-") == 0)? gzdopen(STDIN_FILENO, "r") : gzopen(argv[1], "r");
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
161 mapvalidate_core(fp);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
162 gzclose(fp);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
163 return 0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
164 }