diff spp/src/maqmap.c @ 6:ce08b0efa3fd draft

Uploaded
author zzhou
date Tue, 27 Nov 2012 16:11:40 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/src/maqmap.c	Tue Nov 27 16:11:40 2012 -0500
@@ -0,0 +1,164 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <unistd.h>
+#include "const.h"
+#include "maqmap.h"
+
+maqmap_t *maq_new_maqmap()
+{
+	maqmap_t *mm = (maqmap_t*)calloc(1, sizeof(maqmap_t));
+	mm->format = MAQMAP_FORMAT_NEW;
+	return mm;
+}
+void maq_delete_maqmap(maqmap_t *mm)
+{
+	int i;
+	if (mm == 0) return;
+	for (i = 0; i < mm->n_ref; ++i)
+		free(mm->ref_name[i]);
+	free(mm->ref_name);
+	free(mm->mapped_reads);
+	free(mm);
+}
+void maqmap_write_header(gzFile fp, const maqmap_t *mm)
+{
+	int i, len;
+	gzwrite(fp, &mm->format, sizeof(int));
+	gzwrite(fp, &mm->n_ref, sizeof(int));
+	for (i = 0; i != mm->n_ref; ++i) {
+		len = strlen(mm->ref_name[i]) + 1;
+		gzwrite(fp, &len, sizeof(int));
+		gzwrite(fp, mm->ref_name[i], len);
+	}
+	gzwrite(fp, &mm->n_mapped_reads, sizeof(bit64_t));
+}
+maqmap_t *maqmap_read_header(gzFile fp)
+{
+	maqmap_t *mm;
+	int k, len;
+	mm = maq_new_maqmap();
+	gzread(fp, &mm->format, sizeof(int));
+	if (mm->format != MAQMAP_FORMAT_NEW) {
+		if (mm->format > 0) {
+			fprintf(stderr, "** Obsolete map format is detected. Please use 'mapass2maq' command to convert the format.\n");
+			exit(3);
+		}
+		assert(mm->format == MAQMAP_FORMAT_NEW);
+	}
+	gzread(fp, &mm->n_ref, sizeof(int));
+	mm->ref_name = (char**)calloc(mm->n_ref, sizeof(char*));
+	for (k = 0; k != mm->n_ref; ++k) {
+		gzread(fp, &len, sizeof(int));
+		mm->ref_name[k] = (char*)malloc(len * sizeof(char));
+		gzread(fp, mm->ref_name[k], len);
+	}
+	/* read number of mapped reads */
+	gzread(fp, &mm->n_mapped_reads, sizeof(bit64_t));
+	return mm;
+}
+
+/* mapvalidate */
+
+static void mapvalidate_core(gzFile fpin)
+{
+	maqmap_t *m = maqmap_read_header(fpin);
+	maqmap1_t *m1, mm1;
+	bit64_t n = 0;
+	int i, l;
+	bit64_t *cnt;
+	m1 = &mm1;
+	cnt = (bit64_t*)calloc(m->n_ref, 8);
+	printf("[message] number of reference sequences: %d\n", m->n_ref);
+	while ((l = maqmap_read1(fpin, m1)) != 0) {
+		if (l != sizeof(maqmap1_t)) {
+			printf("[fatal error] truncated map file.\n");
+			break;
+		}
+		++n;
+		if ((int)m1->seqid >= m->n_ref) {
+			printf("[fatal error] maqmap1_t::seqid is invalid (%d >= %d).\n", m1->seqid, m->n_ref);
+			break;
+		}
+		++cnt[m1->seqid];
+		if (m1->size >= MAX_READLEN - 1) {
+			printf("[faltal error] maqmap1_t::size is invalid (%d >= %d).\n", m1->size, MAX_READLEN - 1);
+			break;
+		}
+	}
+	if (m->n_mapped_reads != 0) {
+		if (m->n_mapped_reads != n) {
+			printf("[warning] maqmap1_t::n_mapped_reads is set, but not equals the real number (%llu != %llu).\n",
+					m->n_mapped_reads, n);
+		}
+	}
+	for (i = 0; i != m->n_ref; ++i)
+		printf("[message] %s : %llu\n", m->ref_name[i], cnt[i]);
+	free(cnt);
+	maq_delete_maqmap(m);
+}
+
+/* mapview */
+
+static void mapview_core(FILE *fpout, gzFile fpin, int is_verbose, int is_mm)
+{
+	bit32_t j;
+	maqmap_t *m = maqmap_read_header(fpin);
+	maqmap1_t *m1, mm1;
+	m1 = &mm1;
+	while (maqmap_read1(fpin, m1)) {
+		fprintf(fpout, "%s\t%s\t%d\t%c\t%d\t%u\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d",
+				m1->name, m->ref_name[m1->seqid], (m1->pos>>1) + 1,
+				(m1->pos&1)? '-' : '+', m1->dist, m1->flag, m1->map_qual, (signed char)m1->seq[MAX_READLEN-1],
+				m1->alt_qual, m1->info1&0xf, m1->info2, m1->c[0], m1->c[1], m1->size);
+		if (is_verbose) {
+			fputc('\t', fpout);
+			for (j = 0; j != m1->size; ++j) {
+				if (m1->seq[j] == 0) fputc('n', fpout);
+				else if ((m1->seq[j]&0x3f) < 27) fputc("acgt"[m1->seq[j]>>6&3], fpout);
+				else fputc("ACGT"[m1->seq[j]>>6&3], fpout);
+			}
+			fputc('\t', fpout);
+			for (j = 0; j != m1->size; ++j)
+				fputc((m1->seq[j]&0x3f) + 33, fpout);
+		}
+		if (is_mm) {
+			bit64_t *p = (bit64_t*)(m1->seq + 55);
+			fprintf(fpout, "\t%llx", *p);
+		}
+		fputc('\n', fpout);
+	}
+	maq_delete_maqmap(m);
+}
+
+int ma_mapview(int argc, char *argv[])
+{
+	int c, is_verbose = 1, is_mm = 0;
+	while ((c = getopt(argc, argv, "bN")) >= 0) {
+		switch (c) {
+		case 'b': is_verbose = 0; break;
+		case 'N': is_mm = 1; break;
+		}
+	}
+	if (argc == optind) {
+		fprintf(stderr, "Usage: maq mapview [-bN] <in.map>\n");
+		return 1;
+	}
+	gzFile fp = (strcmp(argv[optind], "-") == 0)? gzdopen(STDIN_FILENO, "r") : gzopen(argv[optind], "r");
+	mapview_core(stdout, fp, is_verbose, is_mm);
+	gzclose(fp);
+	return 0;
+}
+
+int ma_mapvalidate(int argc, char *argv[])
+{
+	gzFile fp;
+	if (argc < 2) {
+		fprintf(stderr, "Usage: maq mapvalidate <in.map>\n");
+		return 1;
+	}
+	fp = (strcmp(argv[optind], "-") == 0)? gzdopen(STDIN_FILENO, "r") : gzopen(argv[1], "r");
+	mapvalidate_core(fp);
+	gzclose(fp);
+	return 0;
+}