Logo Search packages:      
Sourcecode: maqview version File versions  Download package

cns_cache.c

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "caches.h"
#include "stdhashc.h"

00007 struct CNSObj {
      CNSCache *cache;
      unsigned char buf[1024];
      int buf_size;
      int64_t skips;
      int heading;
      int n;
      int count;
};

#define old_read_buf(target, size) if(obj->buf_size >= size){ \
                  memcpy(target, obj->buf, size);\
                  obj->buf_size -= size;\
            } else if(obj->buf_size){\
                  if(obj->buf_size + buf_size >= size){\
                        memcpy(target, obj->buf, obj->buf_size);\
                        memcpy(target + obj->buf_size, buf, size - obj->buf_size);\
                        buf += size - obj->buf_size;\
                        buf_size -= size - obj->buf_size;\
                        obj->buf_size = 0;\
                  } else {\
                        memcpy(obj->buf + obj->buf_size, buf, buf_size);\
                        obj->buf_size += buf_size;\
                        return;\
                  }\
            } else if(buf_size < size){\
                  if(buf_size) memcpy(obj->buf, buf, buf_size);\
                  obj->buf_size = buf_size;\
                  return;\
            } else {\
                  memcpy(target, buf, size);\
                  buf += size;\
                  buf_size -= size;\
            }

static int read_buf(struct CNSObj *obj, void *target, unsigned char **buf_ptr, int *buf_size_ptr, int size){
            if(obj->buf_size >= size){
                  memcpy(target, obj->buf, size);
                  obj->buf_size -= size;
            } else if(obj->buf_size){
                  if(obj->buf_size + *buf_size_ptr >= size){
                        memcpy(target, obj->buf, obj->buf_size);
                        memcpy(target + obj->buf_size, *buf_ptr, size - obj->buf_size);
                        *buf_ptr += size - obj->buf_size;
                        *buf_size_ptr -= size - obj->buf_size;
                        obj->buf_size = 0;
                  } else {
                        memcpy(obj->buf + obj->buf_size, *buf_ptr, *buf_size_ptr);
                        obj->buf_size += *buf_size_ptr;
                        return 0;
                  }
            } else if(*buf_size_ptr < size){
                  if(*buf_size_ptr) memcpy(obj->buf, *buf_ptr, *buf_size_ptr);
                  obj->buf_size = *buf_size_ptr;
                  return 0;
            } else {
                  memcpy(target, *buf_ptr, size);
                  *buf_ptr += size;
                  *buf_size_ptr -= size;
            }
            return 1;
}

void cns_notify(void *object, unsigned char *buf, int buf_size, int64_t pos){
      struct CNSObj *obj;
      obj = (struct CNSObj*)object;
      AGAIN:
      if(!obj->heading){
            if(buf_size > obj->skips){
                  buf_size -= obj->skips;
                  buf += obj->skips;
                  obj->skips = 0;
                  obj->heading = 1;
                  obj->cache->ref_id = obj->cache->n_ref;
                  obj->cache->n_ref ++;
                  obj->cache->ref_name = (char**)realloc(obj->cache->ref_name, sizeof(char*) * obj->cache->n_ref);
                  obj->cache->ref_offsets = (int64_t*)realloc(obj->cache->ref_offsets, sizeof(int64_t) * obj->cache->n_ref);
                  obj->cache->ref_lengths = (int64_t*)realloc(obj->cache->ref_lengths, sizeof(int64_t) * obj->cache->n_ref);
                  obj->cache->ref_states = (int*)realloc(obj->cache->ref_states, sizeof(int) * obj->cache->n_ref);
                  obj->cache->last_ref_pos = pos - buf_size;
            } else {
                  obj->skips -= buf_size;
                  return;
            }
      }
      switch(obj->heading){
            case 1:
                  if(!read_buf(obj, &obj->n, &buf, &buf_size, sizeof(int))) return;
                  obj->cache->ref_offsets[obj->cache->ref_id] = obj->cache->last_ref_pos + sizeof(int) + obj->n + sizeof(int);
                  obj->cache->ref_name[obj->cache->ref_id] = (char*)malloc(obj->n + 1);
                  obj->heading = 2;
            case 2:
                  if(!read_buf(obj, obj->cache->ref_name[obj->cache->ref_id], &buf, &buf_size, obj->n)) return;
                  obj->cache->ref_name[obj->cache->ref_id][obj->n] = 0;
                  obj->heading = 3;
            case 3:
                  if(!read_buf(obj, &obj->skips, &buf, &buf_size, sizeof(int))) return;
                  obj->cache->ref_lengths[obj->cache->ref_id] = obj->skips;
                  obj->cache->ref_states[obj->cache->ref_id] = 1;
                  obj->skips *= sizeof(cns_t);
                  obj->heading = 0;
                  if(obj->skips <= obj->buf_size){
                        obj->buf_size -= obj->skips;
                        obj->skips = 0;
                        goto AGAIN;
                  }
                  obj->skips -= obj->buf_size;
                  obj->buf_size = 0;
                  if(obj->skips < buf_size){
                        buf_size -= obj->skips;
                        buf += obj->skips;
                        obj->skips = 0;
                        goto AGAIN;
                  }
                  obj->skips -= buf_size;
                  return;
      }
}

CNSCache* make_cns_index(const char *filename, int level){
      CNSCache *cache;
      struct CNSObj obj;
      char *index_name;
      FILE *file;
      int i;
      cache = cns_cache_init();
      if((cache->stream = zropen(filename)) == NULL){
            close_cns_cache(cache);
            return NULL;
      }
      obj.cache = cache;
      obj.heading = 0;
      obj.skips  = 0;
      obj.buf_size = 0;
      zrmkindex(filename, level, cns_notify, &obj);
      //cache->n_ref --;
      index_name = (char*)malloc(strlen(filename) + 9);
      strcpy(index_name, filename);
      strcat(index_name, ".summary");
      if((file = fopen(index_name, "w")) == NULL){ free(index_name); close_cns_cache(cache); return NULL; }
      fprintf(file, "N_REF: %d\n", cache->n_ref);
      for(i=0;i<cache->n_ref;i++){
            fprintf(file, "REF_NAME:%s\n", cache->ref_name[i]);
            fprintf(file, "REF_LENGTH:%ld\n", cache->ref_lengths[i]);
            fprintf(file, "REF_OFFSET:%ld\n", cache->ref_offsets[i]);
      }
      fclose(file);
      return cache;
}

void close_cns_cache(CNSCache * cache){
      int i;
      if(cache == NULL) return;
      for(i=0;i<cache->n_ref;i++){
            free(cache->ref_name[i]);
      }
      if(cache->ref_name) free(cache->ref_name);
      if(cache->ref_lengths) free(cache->ref_lengths);
      if(cache->ref_offsets) free(cache->ref_offsets);
      if(cache->ref_states) free(cache->ref_states);
      if (cache->mm_map) free(cache->mm_map);
      if (cache->cns_map) free(cache->cns_map);
      free(cache->seqs);
      if(cache->stream) zrclose(cache->stream);
      free(cache);
}

CNSCache *cns_cache_init(){
      CNSCache *cns;
      cns = (CNSCache*)calloc(1, sizeof(CNSCache));
      cns->ref_id = -1;
      cns->capacity = 512;
      cns->seqs = (cns_t*)malloc(sizeof(cns_t) * cns->capacity);
      return cns;
}

CNSCache* open_cns_cache(const char *filename){
      CNSCache *cns;
      char *index_name;
      FILE *file;
      int i;
      index_name = (char*)malloc(strlen(filename) + 9);
      strcpy(index_name, filename);
      strcat(index_name, ".summary");
      if((file = fopen(index_name, "r")) == NULL){ free(index_name); return NULL; }
      cns = (CNSCache*)calloc(1, sizeof(CNSCache));
      fscanf(file, "N_REF: %d\n", &cns->n_ref);
      cns->ref_name = (char**)malloc(sizeof(char*) * cns->n_ref);
      cns->ref_lengths = (int64_t*)malloc(sizeof(int64_t) * cns->n_ref);
      cns->ref_offsets = (int64_t*)malloc(sizeof(int64_t) * cns->n_ref);
      cns->ref_states = (int*)calloc(cns->n_ref, sizeof(int));
      cns->capacity = 512;
      cns->seqs = (cns_t*)malloc(sizeof(cns_t) * cns->capacity);
      cns->ref_id = -1;
      for(i=0;i<cns->n_ref;i++){
            cns->ref_name[i] = (char*)malloc(120);
            fscanf(file, "REF_NAME:%s\n", cns->ref_name[i]);
            fscanf(file, "REF_LENGTH:%ld\n", cns->ref_lengths + i);
            fscanf(file, "REF_OFFSET:%ld\n", cns->ref_offsets + i);
      }
      fclose(file);
      if((cns->stream = zropen(filename)) == NULL){ close_cns_cache(cns); return NULL;}
      return cns;
}

static void synchronize_ref(CNSCache *cns, int n_ref, char **ref_name)
{
      hashci_t *h;
      int i, val;
      cns->n_mm_ref = n_ref;
      cns->mm_map = (int*)calloc(cns->n_mm_ref, sizeof(int));
      cns->cns_map = (int*)calloc(cns->n_ref, sizeof(int));
      h = hci_init();
      for (i = 0; i != cns->n_ref; ++i) hci_put(h, cns->ref_name[i], i);
      for (i = 0; i != cns->n_mm_ref; ++i)
            cns->mm_map[i] = hci_get(h, ref_name[i], &val)? val : -1;
      hci_destroy(h);
      h = hci_init();
      for (i = 0; i != cns->n_mm_ref; ++i) hci_put(h, ref_name[i], i);
      for (i = 0; i != cns->n_ref; ++i)
            cns->cns_map[i] = hci_get(h, cns->ref_name[i], &val)? val : -1;
      hci_destroy(h);
}

void justify_cns_cache(CNSCache *cns, char **ref_name, int n_ref){
      cns->offset = 0;
      cns->size = 0;
      cns->start = 0;
      cns->end = 0;
      cns->ref_id = 0;
      cns->last_ref_pos = 0;
      synchronize_ref(cns, n_ref, ref_name);
}

int cns_cache_put(CNSCache *cns, int ref_id, int64_t start, int64_t end){
      int64_t offset;
      int64_t size;
      if(cns->stream == NULL) return -2;
      if(ref_id < 0 || ref_id >= cns->n_ref) return -2;
      cns->src_type = CNS_SRC_FILE;
      if(start < 0) start = 0;
      if(start >= cns->ref_lengths[ref_id]) start = cns->ref_lengths[ref_id] - 1;
      if(end < 0) end = 0;
      if(end >= cns->ref_lengths[ref_id]) end = cns->ref_lengths[ref_id] - 1;
      if (cns->ref_id < 0 || ref_id != cns->mm_map[cns->ref_id]){
            cns->ref_id = cns->cns_map[ref_id];
            goto RESET;
      }
      if(start < cns->start){
            if(end < cns->start || end > cns->end){
                  goto RESET;
            } else {
                  goto APPEND_LEFT;
            }
      } else if(start >= cns->end){
            goto RESET;
      } else {
            if(end > cns->end){
                  goto APPEND_RIGHT;
            } else {
                  return 0;
            }
      }

      RESET:
      offset = cns->ref_offsets[ref_id] + start * sizeof(cns_t);
      if(zrseek(cns->stream, offset) < 0){
            fprintf(stderr, "Read Error: at %s:%d\n", __FILE__, __LINE__);
            return -1;
      }
      cns->offset = 0;
      cns->size   = end - start + 1;
      if(cns->capacity < cns->size){
            cns->capacity = cns->size;
            cns->seqs = (cns_t*)realloc(cns->seqs, sizeof(cns_t) * cns->capacity);
      }
      offset = zrread(cns->seqs, sizeof(cns_t) * cns->size, cns->stream);
      if(offset != cns->size * sizeof(cns_t)){
            fprintf(stderr, "Read Error: at %s:%d\n", __FILE__, __LINE__);
            return -1;
      }
      cns->last_ref_pos = zrtell(cns->stream);
      cns->start = start;
      cns->end   = end;
      return cns->size;

      APPEND_LEFT:
      start -= 1024 * 5;
      if(start < 0) start = 0;
      offset = cns->ref_offsets[ref_id] + start * sizeof(cns_t);
      if(zrseek(cns->stream, offset) < 0){
            fprintf(stderr, "Read Error: at %s:%d\n", __FILE__, __LINE__);
            return -1;
      }
      size = cns->start - start;
      if(cns->offset < size){
            if(cns->size + size > cns->capacity){
                  if(end - start + 1 > cns->capacity){
                        size = end - start + 1;
                        cns->capacity = size;
                        cns->offset = cns->capacity;
                        cns->size = 0;
                        cns->seqs = (cns_t*)realloc(cns->seqs, sizeof(cns_t) * cns->capacity);
                  } else {
                        cns->size = cns->capacity - size;
                        memmove(cns->seqs + size, cns->seqs + cns->offset, sizeof(cns_t) * cns->size);
                        cns->offset = size;
                  }
            } else {
                  memmove(cns->seqs + cns->capacity - cns->size, cns->seqs + cns->offset, sizeof(cns_t) * (cns->size));
                  cns->offset = cns->capacity - cns->size;
            }
      }
      offset = zrread(cns->seqs + cns->offset - size, sizeof(cns_t) * size, cns->stream);
      if(offset != size * sizeof(cns_t)){
            fprintf(stderr, "Read Error: at %s:%d\n", __FILE__, __LINE__);
            return -1;
      }
      cns->last_ref_pos = zrtell(cns->stream);
      cns->size += size;
      cns->offset -= size;
      cns->start = start;
      cns->end   = start + cns->size - 1;
      return end - start + 1;

      APPEND_RIGHT:
      end += 1024;
      if(end >= cns->ref_lengths[ref_id]) end = cns->ref_lengths[ref_id] - 1;
      size = end - cns->end;
      if(cns->offset + cns->size + size > cns->capacity){
            if(cns->size + size <= cns->capacity){
                  memmove(cns->seqs, cns->seqs + cns->offset, cns->size * sizeof(cns_t));
                  cns->offset = 0;
            } else {
                  if(end - start + 1 > cns->capacity){
                        size = end - start + 1;
                        cns->capacity = size;
                        cns->offset = 0;
                        cns->size = 0;
                        cns->seqs = (cns_t*)realloc(cns->seqs, sizeof(cns_t) * cns->capacity);
                  } else {
                        memmove(cns->seqs, cns->seqs + cns->offset + cns->size - (cns->capacity - size),
                              sizeof(cns_t) * (cns->capacity - size));
                        cns->offset = 0;
                        cns->size = cns->capacity - size;
                  }
            }
      }
      offset = cns->ref_offsets[ref_id] + (end - size + 1) * sizeof(cns_t);
      if(zrseek(cns->stream, offset) < 0){
            fprintf(stderr, "Read Error: at %s:%d\n", __FILE__, __LINE__);
            return -1;
      }
      offset = zrread(cns->seqs + cns->offset + cns->size, sizeof(cns_t) * size, cns->stream);
      if(offset != size * sizeof(cns_t)){
            fprintf(stderr, "Read Error: at %s:%d\n", __FILE__, __LINE__);
            return -1;
      }
      cns->last_ref_pos = zrtell(cns->stream);
      cns->size += size;
      cns->start = end - cns->size + 1;
      cns->end   = end;
      return end - start + 1;
}

int cns_cache_guess(CNSCache *cns, ReadCache *cache){
      int i, n, l;
      int *guess_seqs;
      int64_t r;
      int trans_table[] = {1, 2, 4, 8, 15};
      cns->src_type = CNS_SRC_GUESS;
      if(cns->ref_id == cache->ref_id && cache->start >= cns->start && cache->end <= cns->end){
            return 0;
      }
      if(cns->capacity < cache->end - cache->start + 1){
            cns->capacity = cache->end - cache->start + 1;
            cns->seqs = (cns_t*)realloc(cns->seqs, sizeof(cns_t) * cns->capacity);
      }
      cns->offset = 0;
      cns->size   = cache->end - cache->start + 1;
      cns->start  = cache->start;
      cns->end    = cache->end;
      cns->ref_id = cache->ref_id;
      guess_seqs = (int*)malloc(sizeof(int) * cns->size * 4);
      if(guess_seqs == NULL){
            fprintf(stderr, "Maqview ** Out of memory %ld M **\n", sizeof(int) * cns->size * 4 / (1024 * 1024));
            fprintf(stderr, "Maqview ** cache %ld - %ld **\n", cache->start, cache->end);
            return -1;
      }
      memset(guess_seqs, 0, sizeof(int) * cns->size * 4);
      for(i=0;i<cache->size;i++){
            r = read_pos(cache->reads[cache->offset + i].read.pos);
            for(l=0;l<cache->reads[cache->offset + i].read.size;l++){
                  if(r + l > cns->end) break;
                  guess_seqs[(r + l - cns->start) * 4 + (cache->reads[cache->offset + i].read.seq[l]>>6&3)] ++;
            }
      }
      memset(cns->seqs, 0, sizeof(cns_t) * cns->size);
      for(i=0;i<cns->size;i++){
            r = 4;
            n = 0;
            for(l=0;l<4;l++){
                  if(guess_seqs[i * 4 + l] > n){
                        r = l;
                        n = guess_seqs[i * 4 + l];
                  }
            }
            cns_set_ref(cns->seqs[i], 0);
            cns_set_cns(cns->seqs[i], trans_table[r]);
      }
      free(guess_seqs);
      return cns->size;
}

Generated by  Doxygen 1.6.0   Back to index