diff --git a/hfile.c b/hfile.c index b0c5eba30..48a124469 100644 --- a/hfile.c +++ b/hfile.c @@ -83,15 +83,15 @@ then there is a non-empty read buffer, and if begin == end then both buffers are empty. In all cases, the stream's file position indicator corresponds to the position pointed to by begin. -The above is the normal scenario of a mobile window. For in-memory streams, -a fixed (immobile) buffer can be used as the full contents without any separate -backend behind it. These always have at_eof set, offset set to 0, need no -read() method, and should just return EINVAL for seek(): +The above is the normal scenario of a mobile window. For in-memory +streams (eg via hfile_init_fixed) the buffer can be used as the full +contents without any separate backend behind it. These always have at_eof +set, offset set to 0, need no read() method, and should just return EINVAL +for seek(): abcdefghijkLMNOPQRSTUVWXYZ------ ^buffer ^begin ^end ^limit - -Use hfile_init_fixed() to create one of these. */ +*/ hFILE *hfile_init(size_t struct_size, const char *mode, size_t capacity) { @@ -138,6 +138,8 @@ hFILE *hfile_init_fixed(size_t struct_size, const char *mode, return fp; } +static const struct hFILE_backend mem_backend; + void hfile_destroy(hFILE *fp) { int save = errno; @@ -404,7 +406,7 @@ off_t hseek(hFILE *fp, off_t offset, int whence) { off_t curpos, pos; - if (writebuffer_is_nonempty(fp)) { + if (writebuffer_is_nonempty(fp) && fp->mobile) { int ret = flush_buffer(fp); if (ret < 0) return ret; } @@ -615,6 +617,56 @@ static hFILE *hopen_fd(const char *filename, const char *mode) return NULL; } +// Loads the contents of filename to produced a read-only, in memory, +// immobile hfile. fp is the already opened file. We always close this +// input fp, irrespective of whether we error or whether we return a new +// immobile hfile. +static hFILE *hpreload(hFILE *fp) { + hFILE *mem_fp; + char *buf = NULL; + off_t buf_sz = 0, buf_a = 0, buf_inc = 8192, len; + + for (;;) { + if (buf_a - buf_sz < 5000) { + buf_a += buf_inc; + char *t = realloc(buf, buf_a); + if (!t) goto err; + buf = t; + if (buf_inc < 1000000) buf_inc *= 1.3; + } + len = hread(fp, buf+buf_sz, buf_a-buf_sz); + if (len > 0) + buf_sz += len; + else + break; + } + + if (len < 0) goto err; + mem_fp = hfile_init_fixed(sizeof(hFILE), "r", buf, buf_sz, buf_a); + if (!mem_fp) goto err; + mem_fp->backend = &mem_backend; + + if (hclose(fp) < 0) { + hclose_abruptly(mem_fp); + goto err; + } + return mem_fp; + + err: + free(buf); + hclose_abruptly(fp); + return NULL; +} + +static int is_preload_url_remote(const char *url){ + return hisremote(url + 8); // len("preload:") = 8 +} + +static hFILE *hopen_preload(const char *url, const char *mode){ + hFILE* fp = hopen(url + 8, mode); + return hpreload(fp); +} + hFILE *hdopen(int fd, const char *mode) { hFILE_fd *fp = (hFILE_fd*) hfile_init(sizeof (hFILE_fd), mode, blksize(fd)); @@ -711,6 +763,16 @@ static int cmp_prefix(const char *key, const char *s) return 0; } +static hFILE *create_hfile_mem(char* buffer, const char* mode, size_t buf_filled, size_t buf_size) +{ + hFILE_mem *fp = (hFILE_mem *) hfile_init_fixed(sizeof(hFILE_mem), mode, buffer, buf_filled, buf_size); + if (fp == NULL) + return NULL; + + fp->base.backend = &mem_backend; + return &fp->base; +} + static hFILE *hopen_mem(const char *url, const char *mode) { size_t length, size; @@ -734,13 +796,59 @@ static hFILE *hopen_mem(const char *url, const char *mode) if (buffer == NULL) return NULL; hts_decode_percent(buffer, &length, data); } + hFILE* hf; - hFILE_mem *fp = (hFILE_mem *) - hfile_init_fixed(sizeof (hFILE_mem), mode, buffer, length, size); - if (fp == NULL) { free(buffer); return NULL; } + if(!(hf = create_hfile_mem(buffer, mode, length, size))){ + free(buffer); + return NULL; + } - fp->base.backend = &mem_backend; - return &fp->base; + return hf; +} + +hFILE *hopenv_mem(const char *filename, const char *mode, va_list args) +{ + char* buffer = va_arg(args, char*); + size_t sz = va_arg(args, size_t); + va_end(args); + + hFILE* hf; + + if(!(hf = create_hfile_mem(buffer, mode, sz, sz))){ + free(buffer); + return NULL; + } + + return hf; +} + +char *hfile_mem_get_buffer(hFILE *file, size_t *length) { + if (file->backend != &mem_backend) { + errno = EINVAL; + return NULL; + } + + if (length) + *length = file->buffer - file->limit; + + return file->buffer; +} + +char *hfile_mem_steal_buffer(hFILE *file, size_t *length) { + char *buf = hfile_mem_get_buffer(file, length); + if (buf) + file->buffer = NULL; + return buf; +} + +int hfile_plugin_init_mem(struct hFILE_plugin *self) +{ + // mem files are declared remote so they work with a tabix index + static const struct hFILE_scheme_handler handler = + {NULL, hfile_always_remote, "mem", 2000 + 50, hopenv_mem}; + self->name = "mem"; + hfile_add_scheme_handler("mem", &handler); + return 0; } @@ -825,14 +933,17 @@ static void load_hfile_plugins() { static const struct hFILE_scheme_handler data = { hopen_mem, hfile_always_local, "built-in", 80 }, - file = { hopen_fd_fileuri, hfile_always_local, "built-in", 80 }; + file = { hopen_fd_fileuri, hfile_always_local, "built-in", 80 }, + preload = { hopen_preload, is_preload_url_remote, "built-in", 80 }; schemes = kh_init(scheme_string); if (schemes == NULL) abort(); hfile_add_scheme_handler("data", &data); hfile_add_scheme_handler("file", &file); + hfile_add_scheme_handler("preload", &preload); init_add_plugin(NULL, hfile_plugin_init_net, "knetfile"); + init_add_plugin(NULL, hfile_plugin_init_mem, "mem"); #ifdef ENABLE_PLUGINS struct hts_path_itr path; @@ -908,21 +1019,25 @@ static const struct hFILE_scheme_handler *find_scheme_handler(const char *s) hFILE *hopen(const char *fname, const char *mode, ...) { + hFILE *fp = NULL; + const struct hFILE_scheme_handler *handler = find_scheme_handler(fname); if (handler) { - if (strchr(mode, ':') == NULL) return handler->open(fname, mode); + if (strchr(mode, ':') == NULL) fp = handler->open(fname, mode); else if (handler->priority >= 2000 && handler->vopen) { - hFILE *fp; va_list arg; va_start(arg, mode); fp = handler->vopen(fname, mode, arg); va_end(arg); - return fp; } else { errno = ENOTSUP; return NULL; } } - else if (strcmp(fname, "-") == 0) return hopen_fd_stdinout(mode); - else return hopen_fd(fname, mode); + else if (strcmp(fname, "-") == 0) fp = hopen_fd_stdinout(mode); + else fp = hopen_fd(fname, mode); + + if (!fp) return NULL; + + return fp; } int hfile_always_local (const char *fname) { return 0; } diff --git a/htslib/hfile.h b/htslib/hfile.h index fa8971842..5b53b0386 100644 --- a/htslib/hfile.h +++ b/htslib/hfile.h @@ -202,7 +202,7 @@ hread(hFILE *fp, void *buffer, size_t nbytes) if (n > nbytes) n = nbytes; memcpy(buffer, fp->begin, n); fp->begin += n; - return (n == nbytes)? (ssize_t) n : hread2(fp, buffer, nbytes, n); + return (n == nbytes || !fp->mobile)? (ssize_t) n : hread2(fp, buffer, nbytes, n); } /// Write a character to the stream @@ -239,7 +239,15 @@ static inline ssize_t HTS_RESULT_USED hwrite(hFILE *fp, const void *buffer, size_t nbytes) { extern ssize_t hwrite2(hFILE *, const void *, size_t, size_t); - + extern int hfile_set_blksize(hFILE *fp, size_t bufsiz); + + if(!fp->mobile){ + if (fp->limit - fp->begin < nbytes){ + hfile_set_blksize(fp, fp->limit - fp->buffer + nbytes); + fp->end = fp->limit; + } + } + size_t n = fp->limit - fp->begin; if (n > nbytes) n = nbytes; memcpy(fp->begin, buffer, n); @@ -254,6 +262,24 @@ This includes low-level flushing such as via `fdatasync(2)`. */ int hflush(hFILE *fp) HTS_RESULT_USED; +/// For hfile_mem: get the internal buffer and it's size from a hfile +/** @return buffer if successful, or NULL if an error occurred + +The buffer returned should not be freed as this will happen when the +hFILE is closed. +*/ +char *hfile_mem_get_buffer(hFILE *file, size_t *length); + +/// For hfile_mem: get the internal buffer and it's size from a hfile. +/** @return buffer if successful, or NULL if an error occurred + +This is similar to hfile_mem_get_buffer except that ownership of the +buffer is granted to the caller, who now has responsibility for freeing +it. From this point onwards, the hFILE should not be used for any +purpose other than closing. +*/ +char *hfile_mem_steal_buffer(hFILE *file, size_t *length); + #ifdef __cplusplus } #endif diff --git a/test/hfile.c b/test/hfile.c index 577b8171b..7e09ba03c 100644 --- a/test/hfile.c +++ b/test/hfile.c @@ -202,6 +202,47 @@ int main(void) if ((c = hgetc(fin)) != EOF) fail("chars: hgetc (EOF) returned %d", c); if (hclose(fin) != 0) fail("hclose(test/hfile_chars.tmp) for reading"); + fin = hopen("preload:test/hfile_chars.tmp", "r"); + if (fin == NULL) fail("preloading \"test/hfile_chars.tmp\" for reading"); + for (i = 0; i < 256; i++) + if ((c = hgetc(fin)) != i) + fail("preloading chars: hgetc (%d = 0x%x) returned %d = 0x%x", i, i, c, c); + if ((c = hgetc(fin)) != EOF) fail("preloading chars: hgetc (EOF) returned %d", c); + if (hclose(fin) != 0) fail("preloading hclose(test/hfile_chars.tmp) for reading"); + + char* test_string = strdup("Test string"); + fin = hopen("mem:", "r:", test_string, 12); + if (fin == NULL) fail("hopen(\"mem:\", \"r:\", ...)"); + if (hread(fin, buffer, 12) != 12) + fail("hopen('mem:', 'r') failed read"); + if(strcmp(buffer, test_string) != 0) + fail("hopen('mem:', 'r') missread '%s' != '%s'", buffer, test_string); + char* internal_buf; + size_t interval_buf_len; + if((internal_buf = hfile_mem_get_buffer(fin, &interval_buf_len)) == NULL){ + fail("hopen('mem:', 'r') failed to get internal buffer"); + } + if (hclose(fin) != 0) fail("hclose mem for reading"); + + test_string = strdup("Test string"); + fin = hopen("mem:", "wr:", test_string, 12); + if (fin == NULL) fail("hopen(\"mem:\", \"w:\", ...)"); + if (hseek(fin, -1, SEEK_END) < 0) + fail("hopen('mem:', 'wr') failed seek"); + if (hwrite(fin, " extra", 7) != 7) + fail("hopen('mem:', 'wr') failed write"); + if (hseek(fin, 0, SEEK_SET) < 0) + fail("hopen('mem:', 'wr') failed seek"); + if (hread(fin, buffer, 18) != 18) + fail("hopen('mem:', 'wr') failed read"); + if (strcmp(buffer, "Test string extra") != 0) + fail("hopen('mem:', 'wr') misswrote '%s' != '%s'", buffer, "Test string extra"); + if((internal_buf = hfile_mem_steal_buffer(fin, &interval_buf_len)) == NULL){ + fail("hopen('mem:', 'wr') failed to get internal buffer"); + } + free(internal_buf); + if (hclose(fin) != 0) fail("hclose mem for writing"); + fin = hopen("data:,hello, world!%0A", "r"); if (fin == NULL) fail("hopen(\"data:...\")"); n = hread(fin, buffer, 300);