Logo Search packages:      
Sourcecode: maqview version File versions  Download package

maqmap_index.c

#include "maqmap_index.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

00006 struct CallbackObj {
      MapIndex *mi;
      maqmap_t *mm;
      BTree *tree, *last_node;
      int last_ref_id, layer;
      int64_t index;
      unsigned char buf[1024]; // I never expect ref_name[x] exceed this length
      int buf_size;
      int heading;
      int count;
      int n;
};

#define read_buf(target, size) if(obj->buf_size >= size){ \
                  memcpy(target, obj->buf, size);\
                  obj->buf_size -= size;\
            } else if(obj->buf_size){\
                  if(obj->buf_size + buf_size >= size){\
                        memcpy(target, obj->buf, obj->buf_size);\
                        memcpy(target + obj->buf_size, buf, size - obj->buf_size);\
                        buf += size - obj->buf_size;\
                        buf_size -= size - obj->buf_size;\
                        obj->buf_size = 0;\
                  } else {\
                        memcpy(obj->buf + obj->buf_size, buf, buf_size);\
                        obj->buf_size += buf_size;\
                        return;\
                  }\
            } else if(buf_size < size){\
                  memcpy(obj->buf, buf, buf_size);\
                  obj->buf_size = buf_size;\
                  return;\
            } else {\
                  memcpy(target, buf, size);\
                  buf += size;\
                  buf_size -= size;\
            }
void notify(void *object, unsigned char *buf, int buf_size, int64_t pos){
      struct CallbackObj *obj;
      maplet *let;
      int i;
      obj = (struct CallbackObj*)object;
      while(obj->heading){
            switch(obj->heading){
                  case 1:
                        obj->mi->head_size = 0;
                        read_buf(&obj->mm->format, (int)sizeof(int));
                        if (obj->mm->format != MAQMAP_FORMAT_NEW) {
                              if (obj->mm->format > 0) {
                                    fprintf(stderr, "** Obsolete map format is detected (%d). Please use 'mapass2maq' command to convert the format.\n", obj->mm->format);
                                    exit(3);
                              }
                        }
                        obj->mi->head_size += sizeof(int);
                        obj->heading = 2;
                  case 2:
                        read_buf(&obj->mm->n_ref, (int)sizeof(int));
                        obj->mm->ref_name = (char**)calloc(obj->mm->n_ref, sizeof(char*));
                        obj->mi->trees = (BTree**)malloc(sizeof(BTree*) * obj->mm->n_ref);
                        for(i=0;i<obj->mm->n_ref;i++) obj->mi->trees[i] = NULL;
                        obj->count = 0;
                        obj->mi->head_size += sizeof(int);
                        obj->heading = 3;
                  case 3:
                        if(obj->count == obj->mm->n_ref){
                              obj->heading = 5;
                              break;
                        }
                        read_buf(&obj->n, (int)sizeof(int));
                        obj->mi->head_size += sizeof(int);
                        obj->mm->ref_name[obj->count] = (char*)malloc(obj->n + 1);
                        memset(obj->mm->ref_name[obj->count], 0, obj->n+1);
                        obj->heading = 4;
                  case 4:
                        read_buf(obj->mm->ref_name[obj->count], obj->n);
                        obj->mm->ref_name[obj->count][obj->n] = 0;
                        obj->count ++;
                        obj->mi->head_size += obj->n;
                        obj->heading = 3;
                        break;
                  case 5:
                        read_buf(&obj->mm->n_mapped_reads, (int)sizeof(bit64_t));
                        obj->mi->head_size += sizeof(bit64_t);
                        obj->heading = 0;
            }
      }
      while(buf_size + obj->buf_size >= (int)sizeof(maplet)){
            if(obj->buf_size){
                  memcpy(obj->buf + obj->buf_size, buf, sizeof(maplet) - obj->buf_size);
                  buf += sizeof(maplet) - obj->buf_size;
                  buf_size -= sizeof(maplet) - obj->buf_size;
                  obj->buf_size = 0;
                  let = (maplet*)obj->buf;
            } else {
                  let = (maplet*)buf;
                  buf += sizeof(maplet);
                  buf_size -= sizeof(maplet);
            }
            if((int)let->seqid != obj->last_ref_id){
                  if(obj->tree != NULL){
                        obj->mi->trees[obj->last_ref_id] = obj->tree;
                  }
                  obj->tree = btree_init();
                  obj->last_node = obj->tree;
                  obj->last_node->left = obj->last_node->right = (int64_t)read_pos(let->pos);
                  obj->last_node->indexs[0] = obj->last_node->indexs[1] = obj->index;
                  obj->layer = 1;
                  obj->last_ref_id = let->seqid;
            } else {
                  obj->last_node = btree_append(&obj->tree, obj->last_node, &obj->layer, obj->index, (int64_t)read_pos(let->pos));
            }
            obj->index ++;
      }
      if(buf_size){
            memcpy(obj->buf + obj->buf_size, buf, buf_size);
            obj->buf_size += buf_size;
      }
}

maqmap_t *maq_new_maqmap()
{
      maqmap_t *mm = (maqmap_t*)calloc(1, sizeof(maqmap_t));
      mm->format = MAQMAP_FORMAT_NEW;
      return mm;
}

MapIndex * make_map_index(const char *filename, int level){
      MapIndex *mi;
      maqmap_t *mm;
      struct CallbackObj obj;
      zr_stream *zr;
      char *index_name;
      gzFile file;
      int i;

//    if((zr = zropen(filename)) == NULL) return NULL;

      mm = maq_new_maqmap();
      mi = (MapIndex*)malloc(sizeof(MapIndex));
      mi->mm = mm;
//    mi->trees = (BTree**)malloc(sizeof(BTree*) * mm->n_ref);
//    for(i=0;i<mm->n_ref;i++) mi->trees[i] = NULL;
//    zrclose(zr);
      obj.mi = mi;
      obj.mm = mm;
      obj.tree  = NULL;
      obj.last_node = NULL;
      obj.n = 0;
      obj.count = 0;
      obj.buf_size = 0;
      obj.heading = 1;
      obj.index = 0;
      obj.layer = 1;
      obj.last_ref_id = -1;
      zrmkindex(filename, level, notify, &obj);
      if(obj.tree != NULL) obj.mi->trees[obj.last_ref_id] = obj.tree;
      if((zr = zropen(filename)) == NULL) return NULL;
      mi->stream = zr;
      index_name = (char*)malloc(strlen(filename) + 4);
      strcpy(index_name, filename);
      strcat(index_name, ".vm");
      if((file = gzopen(index_name, "w")) == NULL){ free(index_name); close_map_index(mi); return NULL; }
      free(index_name);
      gzwrite(file, &mi->head_size, sizeof(int));
      gzwrite(file, &(mm->n_ref), sizeof(int));
      for(i=0;i<mm->n_ref;i++){ btree_dump_gz(file, mi->trees[i]); }
      gzclose(file);
      return mi;
}

MapIndex * make_map_index_old(const char *filename){
      MapIndex *mi;
      maqmap_t *mm;
      maplet let;
      BTree *tree, *last_node;
      zr_stream *zr;
      char *index_name;
      gzFile file;
      int i, last_ref_id, layer, head_size;
      int64_t index;
      if((zr = zropen(filename)) == NULL) return NULL;
      index_name = (char*)malloc(strlen(filename) + 4);
      strcpy(index_name, filename);
      strcat(index_name, ".vm");
      mm = maqmap_read_header(zr);
      mi = (MapIndex*)malloc(sizeof(MapIndex));
      mi->mm = mm;
      head_size = sizeof(int) * 2 + sizeof(bit64_t);
      for(i=0;i<mm->n_ref;i++){
            head_size += sizeof(int) + strlen(mm->ref_name[i]) + 1;
      }
      mi->head_size = head_size;
      mi->trees = (BTree**)malloc(sizeof(BTree*) * mm->n_ref);
      for(i=0;i<mm->n_ref;i++) mi->trees[i] = NULL;
      mi->stream = zr;
      index = 0;
      layer = 1;
      last_node = NULL;
      last_ref_id = -1;
      tree = NULL;
      while(zrread(&let, sizeof(maplet), zr) == sizeof(maplet)){
            if((int)let.seqid != last_ref_id){
                  if(tree != NULL){
                        mi->trees[last_ref_id] = tree;
                  }
                  tree = btree_init();
                  last_node = tree;
                  last_node->left = last_node->right = (int64_t)read_pos(let.pos);
                  last_node->indexs[0] = last_node->indexs[1] = index;
                  layer = 1;
                  last_ref_id = let.seqid;
            } else {
                  last_node = btree_append(&tree, last_node, &layer, index, (int64_t)read_pos(let.pos));
            }
            index ++;
      }
      if(tree != NULL) mi->trees[last_ref_id] = tree;
      if((file = gzopen(index_name, "w")) == NULL){ free(index_name); close_map_index(mi); return NULL; }
      free(index_name);
      gzwrite(file, &head_size, sizeof(int));
      gzwrite(file, &(mm->n_ref), sizeof(int));
      for(i=0;i<mm->n_ref;i++) btree_dump_gz(file, mi->trees[i]);
      gzclose(file);
      return mi;
}

void maqmap_write_header(gzFile fp, const maqmap_t *mm)
{
      int i, len;
      gzwrite(fp, &mm->format, sizeof(int));
      gzwrite(fp, &mm->n_ref, sizeof(int));
      for (i = 0; i != mm->n_ref; ++i) {
            len = strlen(mm->ref_name[i]) + 1;
            gzwrite(fp, &len, sizeof(int));
            gzwrite(fp, mm->ref_name[i], len);
      }
      gzwrite(fp, &mm->n_mapped_reads, sizeof(bit64_t));
}

maqmap_t *maqmap_read_header(zr_stream *fp)
{
      maqmap_t *mm;
      int k, len;
      mm = maq_new_maqmap();
      zrread(&mm->format, sizeof(int), fp);
      if (mm->format != MAQMAP_FORMAT_NEW) {
            if (mm->format > 0) {
                  fprintf(stderr, "** Obsolete map format is detected. Please use 'mapass2maq' command to convert the format.\n");
                  exit(3);
            }
      }
      zrread(&mm->n_ref, sizeof(int), fp);
      mm->ref_name = (char**)calloc(mm->n_ref, sizeof(char*));
      for (k = 0; k != mm->n_ref; ++k) {
            zrread(&len, sizeof(int), fp);
            mm->ref_name[k] = (char*)malloc(len * sizeof(char) + 1);
            zrread(mm->ref_name[k], (size_t)len, fp);
            mm->ref_name[k][len] = 0;
      }
      /* read number of mapped reads */
      zrread(&mm->n_mapped_reads, sizeof(bit64_t), fp);
      return mm;
}

MapIndex* load_map_index(const char *filename, int auto_mk){
      zr_stream *zr;
      MapIndex *mi;
      maqmap_t *mm;
      char *index_name;
      gzFile file;
      int i, n;
      if((zr = zropen(filename)) == NULL) return NULL;
      index_name = (char*)malloc(strlen(filename) + 4);
      strcpy(index_name, filename);
      strcat(index_name, ".vm");
      if((file = gzopen(index_name, "r")) == NULL){
            zrclose(zr);
            free(index_name);
            if(auto_mk){
                  return make_map_index(filename, 1024);
            } else {
                  return NULL;
            }
      }
      free(index_name);
      mm = maqmap_read_header(zr);
      mi = (MapIndex*)malloc(sizeof(MapIndex));
      mi->mm = mm;
      mi->trees = (BTree**)malloc(sizeof(BTree*) * mm->n_ref);
      for(i=0;i<mm->n_ref;i++) mi->trees[i] = NULL;
      mi->stream = zr;
      gzread(file, &(mi->head_size), sizeof(int));
      gzread(file,&n, sizeof(int));
      if(n != mm->n_ref){
            close_map_index(mi);
            gzclose(file);
      }
      for(i=0;i<mm->n_ref;i++){
            mi->trees[i] = btree_load_gz(file);
      }
      gzclose(file);
      return mi;
}

void close_map_index(MapIndex *mi){
      int i;
      if(mi->stream) zrclose(mi->stream);
      if(mi->mm){
            if(mi->trees){
                  for(i=0;i<mi->mm->n_ref;i++){
                        btree_free(mi->trees[i]);
                  }
                  free(mi->trees);
            }
            if(mi->mm->ref_name){
                  for(i=0;i<mi->mm->n_ref;i++) free(mi->mm->ref_name[i]);
                  free(mi->mm->ref_name);
            }
            if(mi->mm->mapped_reads) free(mi->mm->mapped_reads);
            free(mi->mm);
      }
      free(mi);
}

int iter_map_index(MapIndex *mi, int ref_id, int64_t start, int64_t end, int (*handle)(void *obj, maplet *let), void *obj){
      BTree *tree;
      int64_t offset;
      int64_t left, right, i, l, r;
      maplet let;
      tree = mi->trees[ref_id];
      btree_find(tree, start, &l, &r);
      left = l;
      btree_find(tree, end, &l, &r);
      right = r;
      if(left > right) return 0;
      offset = mi->head_size + sizeof(maplet) * left;
      if(zrseek(mi->stream, offset) < 0){
            return -1;
      }
      l = 0;
      for(i=left;i<=right;i++){
            offset = sizeof(maplet);
            while(offset){
                  r = zrread((void*)(&let) + sizeof(maplet) - offset, offset, mi->stream);
                  if(r <= 0){
                        fprintf(stderr, "Read_map_index Error: %s:%d\n", __FILE__, __LINE__);
                        exit(1);
                  } else {
                        offset -= r;
                  }
            }
            if(read_pos(let.pos) < start || read_pos(let.pos) > end) continue;
            if(handle && !handle(obj, &let)) break;
            l ++;
      }
      return l;
}

maplet* read_map_index(MapIndex *mi, int ref_id, int64_t start, int64_t end, int *n_let){
      BTree *tree;
      int64_t left, right, offset;
      int64_t l, r;
      maplet *let;
      tree = mi->trees[ref_id];
      btree_find(tree, start, &l, &r);
      left = l;
      btree_find(tree, end, &l, &r);
      right = r;
      if(left > right){ return NULL; *n_let = -1; }
      offset = mi->head_size + sizeof(maplet) * left;
      /*
      if(zrseek(mi->stream, offset) < 0){
            *n_let=-1; return NULL;
      }
      l = right - left + 1;
      let = (maplet*)malloc(sizeof(maplet) * l);
      offset = zrread((void*)let, sizeof(maplet) * l, mi->stream);
      */
      l = right - left + 1;
      let = (maplet*)malloc(sizeof(maplet) * l);
      offset = zrgets((void*)let, sizeof(maplet) * l, offset, mi->stream);
      if(offset != l * sizeof(maplet)){
            fprintf(stderr, "Read_map_index Error: %s:%d\n", __FILE__, __LINE__);
            *n_let = 0;
            return NULL;
      }
      *n_let = l;
      return let;
}

maplet* read_map_next_to(MapIndex *mi, int ref_id, int64_t pos, int *n_let){
      maplet *let;
      int size, capacity;
      capacity = 64;
      size = 0;
      let = (maplet*)malloc(sizeof(maplet) * capacity);
      while(zrread(let + size, sizeof(maplet), mi->stream) == sizeof(maplet)){
            if(let[size].seqid != ref_id) break;
            if(read_pos(let[size].pos) > pos) break;
            size ++;
            if(size == capacity){
                  capacity += 64;
                  let = (maplet*)realloc(let, sizeof(maplet) * capacity);
            }
      }
      *n_let = size;
      return let;
}

void num2str(int64_t number, char *string){
      int n, m;
      char *str;
      sprintf(string, "%ld", number);
      n = strlen(string);
      m = ((n - 1) / 3);
      string[n + m] = '\0';
      str = string + n - 3;
      while(m){
            memmove(str + m, str, 3);
            str[m-1] = ',';
            m --;
            str -= 3;
      }
}

int64_t str2num(char *string){
      int i, j, n;
      int64_t rs;
      char *str;
      rs = 0;
      n = strlen(string);
      str = (char*)string;
      i = 0;
      while(n){
            if(*(str + i) == ','){
                  str[i] = '\0';
                  j = i;
                  while(j--)rs *= 10;
                  rs += atol(str);
                  str[i] = ',';
                  str += i + 1;
                  i = 0;
            } else {
                  i ++;
            }
            n --;
      }
      if(i){
            j = i;
            while(j--)rs *= 10;
            rs += atol(str);
      }
      return rs;
}

Generated by  Doxygen 1.6.0   Back to index