Logo Search packages:      
Sourcecode: maqview version File versions  Download package

maqmap_index_main.c

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <assert.h>
#include <sys/types.h>
#include <ctype.h>
#include "zrio.h"
#include "maqmap_index.h"
#include "caches.h"
#include "stdhashc.h"

static gzFile global_fp;

static int usage()
{
      fprintf(stderr, "\n");
      fprintf(stderr, "Usage:   maqindex <-i|-v|-b> [-l level] [-c cns] <in.map> [chr[:begin[-end]] [...]]\n\n");
      fprintf(stderr, "Options: -l INT     level for indexing [1024]\n");
      fprintf(stderr, "         -c STR     .cns file for this .map file\n");
      fprintf(stderr, "         -i         index the map file\n");
      fprintf(stderr, "         -v         display the alignment in the mapview format\n");
      fprintf(stderr, "         -b         dump in the .map format\n\n");
      return 1;
}

hashci_t *mv_mk_hashci(int n, char **keys)
{
      hashci_t *h;
      int i;
      h = hci_init();
      for (i = 0; i != n; ++i)
            hci_put(h, keys[i], i);
      return h;
}

void mv_get_region(const hashci_t *h, const char *str, int *ref_id, int *begin, int *end)
{
      char *s, *p;
      int i, l, k;
      l = strlen(str);
      p = s = (char*)malloc(l+1);
      /* squeeze out "," */
      for (i = k = 0; i != l; ++i)
            if (str[i] != ',' && !isspace(str[i])) s[k++] = str[i];
      s[k] = 0;
      for (i = 0; i != k; ++i) if (s[i] == ':') break;
      s[i] = 0;
      assert(hci_get(h, p, ref_id)); /* get the ref_id */
      if (i == k) { /* dump the whole sequence */
            *begin = 0; *end = 0x7fffffff;
            return;
      }
      for (p = s + i + 1; i != k; ++i) if (s[i] == '-') break;
      *begin = atoi(p);
      if (i < k) {
            p = s + i + 1;
            *end = atoi(p);
      } else {
            *end = 0x7fffffff;
      }
      if (*begin > 0) --*begin;
      if (*end > 0) --*end; /* change to 0 based coordinate */
      assert(*begin <= *end);
      free(s);
}

static int dump_map(void *obj, maplet *m1)
{
      return gzwrite(global_fp, m1, sizeof(maplet));
}

int print_map(void *obj, maplet *m1){
      int j;
      maqmap_t *m;
      FILE *fpout;
      m = (maqmap_t*)obj;
      fpout = stdout;
      fprintf(fpout, "%s\t%s\t%d\t%c\t%d\t%u\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t",
                  m1->name, m->ref_name[m1->seqid], (m1->pos>>1) + 1,
                  (m1->pos&1)? '-' : '+', m1->dist, m1->flag, m1->map_qual, (signed char)m1->seq[MAX_READLEN-1],
                  m1->alt_qual, m1->info1&0xf, m1->info2, m1->c[0], m1->c[1], m1->size);
      for (j = 0; j != m1->size; ++j) {
            if (m1->seq[j] == 0) fputc('n', fpout);
            else if ((m1->seq[j]&0x3f) < 27) fputc("acgt"[m1->seq[j]>>6&3], fpout);
            else fputc("ACGT"[m1->seq[j]>>6&3], fpout);
      }
      fputc('\t', fpout);
      for (j = 0; j != m1->size; ++j)
            fputc((m1->seq[j]&0x3f) + 33, fpout);
      fputc('\n', fpout);
      return 1;
}

int main(int argc, char *argv[])
{
      int c, i, level, task = 0;
      int64_t j;
      char *cns_name = 0;
      CNSCache *cns;
      MapIndex *mi;
      level = 1024;
      cns_name = NULL;
      while ((c = getopt(argc, argv, "l:c:ivb")) >= 0) {
            switch (c) {
            case 'l': level = atoi(optarg); break;
            case 'c': cns_name = strdup(optarg); break;
            case 'i': task = 'i'; break;
            case 'v': task = 'v'; break;
            case 'b': task = 'b'; break;
            }
      }
      if(cns_name){
            if(task != 'i' && task != 'v'){
                  fprintf(stderr, "** One of '-i' and '-v' must be specified for cns.\n");
                  return 1;
            }
      } else {
            if (optind == argc) return usage();
            if (task != 'i' && task != 'v' && task != 'b') {
                  fprintf(stderr, "** One of '-i', '-v' and '-b' must be specified.\n");
                  return 1;
            } else if (task == 'v' || task == 'b') {
                  if (optind+1 == argc) {
                        fprintf(stderr, "** A region like 'chr' or 'chr:begin-end' must be specified with '-v' or '-b'.\n");
                        return 1;
                  }
            }
      }
      if (level < 2) level = 2;
      if (task == 'i') { /* index */
            if(optind < argc){
                  fprintf(stderr, "-- Indexing the alignment file '%s' ... ", argv[optind]);
                  assert(mi = make_map_index(argv[optind], level));
                  close_map_index(mi);
                  fprintf(stderr, "DONE!\n");
            }
            if (cns_name) {
                  fprintf(stderr, "-- Indexing the consensus file '%s' ... ", cns_name);
                  assert(cns = make_cns_index(cns_name, level));
                  fprintf(stderr, "DONE!\n");
                  close_cns_cache(cns);
                  free(cns_name);
            }
      } else { /* use */
            int ref_id, begin, end, (*func)(void*, maplet*);
            hashci_t *h;
            if(cns_name){
                  if(task != 'v') return usage();
                  CNSCache *cns;
                  cns_t *seq;
                  cns = open_cns_cache(cns_name);
                  if(cns == NULL){
                        fprintf(stderr, " -- Cannot open %s in %s -- %s:%d --\n", cns_name, __FUNCTION__, __FILE__, __LINE__);
                  }
                  justify_cns_cache(cns, cns->ref_name, cns->n_ref);
                  h = mv_mk_hashci(cns->n_ref, cns->ref_name);
                  for (i = optind; i != argc; ++i) {
                        mv_get_region(h, argv[i], &ref_id, &begin, &end);
                        if(cns_cache_put(cns, ref_id, (int64_t)begin, (int64_t)end) < 0){
                              fprintf(stderr, " -- Error in %s -- %s:%d --\n", __FUNCTION__, __FILE__, __LINE__);
                              break;
                        }
                        for(j=0;j<cns->size;j++){
                              if(j + cns->start < begin || j + cns->start > end) continue;
                              seq = cns->seqs + j + cns->offset;
                              printf("%s\t%ld\t%c\t%c\t%d\t%d\t%.2f\t%d\t%d\n", cns->ref_name[ref_id], j+cns->start+1, 
                                    nst_nt16_rev_table[seq->ref_base],
                                    nst_nt16_rev_table[seq->base],
                                    seq->qual, seq->depth, seq->avg01/16.0, seq->qMax, seq->qNei<<1);
                        }
                  }
                  hci_destroy(h);
                  close_cns_cache(cns);
                  return 0;
            }
            if(optind >= argc) return usage();
            func = (task == 'v')? print_map : dump_map;
            mi = load_map_index(argv[optind], 1);
            if (mi == 0) {
                  fprintf(stderr, "** The index for '%s' is broken. Please rebuild the index.\n", argv[optind]);
                  return 1;
            }
            if (task == 'b') {
                  global_fp = gzdopen(fileno(stdout), "w");
                  maqmap_write_header(global_fp, mi->mm);
            }
            h = mv_mk_hashci(mi->mm->n_ref, mi->mm->ref_name);
            for (i = optind+1; i != argc; ++i) {
                  mv_get_region(h, argv[i], &ref_id, &begin, &end);
                  iter_map_index(mi, ref_id, begin, end, func, mi->mm);
            }
            hci_destroy(h);
            if (task == 'b') gzclose(global_fp);
            /* BAD: close_map_index(mi); */
      }
      return 0;
}

Generated by  Doxygen 1.6.0   Back to index