Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 17 additions & 7 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
CC = gcc
AR = ar
RANLIB = ranlib

SOURCE_DIR = .
# Default libraries to link if configure is not used
htslib_default_libs = -lz -lm -lbz2 -llzma -lcurl

Expand All @@ -35,10 +35,20 @@ CPPFLAGS =
# TODO: probably update cram code to make it compile cleanly with -Wc++-compat
# For testing strict C99 support add -std=c99 -D_XOPEN_SOURCE=600
#CFLAGS = -g -Wall -O2 -pedantic -std=c99 -D_XOPEN_SOURCE=600
CFLAGS = -g -Wall -O2 -fvisibility=hidden
ifdef DEBUG
CFLAGS = -DDEBUG -g3 -gdwarf-3
LDFLAGS = -g3 -gdwarf-3
else
CFLAGS = -O3
LDFLAGS =
endif
ifdef PROFILE
CFLAGS += -pg
endif
CFLAGS += -Wall -fPIC
EXTRA_CFLAGS_PIC = -fpic
TARGET_CFLAGS =
LDFLAGS = -fvisibility=hidden
LDFLAGS =
VERSION_SCRIPT_LDFLAGS = -Wl,-version-script,$(srcprefix)htslib.map
LIBS = $(htslib_default_libs)

Expand Down Expand Up @@ -123,7 +133,7 @@ htscodecs.mk:
$(srcdir)/hts_probe_cc.sh '$(CC)' '$(CFLAGS) $(CPPFLAGS)' '$(LDFLAGS)' >> $@

srcdir = .
srcprefix =
srcprefix = $(SOURCE_DIR)/
HTSPREFIX =

# Flags for SIMD code
Expand All @@ -138,7 +148,7 @@ HTS_BUILD_SSSE3 =
HTS_BUILD_POPCNT =
HTS_BUILD_SSE4_1 =

include htslib_vars.mk
include $(SOURCE_DIR)/htslib_vars.mk
include htscodecs.mk

# If not using GNU make, you need to copy the version number from version.sh
Expand Down Expand Up @@ -189,10 +199,10 @@ config_vars.h:
.SUFFIXES: .bundle .c .cygdll .dll .o .pico .so

.c.o:
$(CC) $(CFLAGS) $(TARGET_CFLAGS) $(ALL_CPPFLAGS) -c -o $@ $<
$(CC) $(CFLAGS) -I$(SOURCE_DIR) $(TARGET_CFLAGS) $(ALL_CPPFLAGS) -c -o $@ $<

.c.pico:
$(CC) $(CFLAGS) $(TARGET_CFLAGS) $(ALL_CPPFLAGS) $(EXTRA_CFLAGS_PIC) -c -o $@ $<
$(CC) $(CFLAGS) -I$(SOURCE_DIR) $(TARGET_CFLAGS) $(ALL_CPPFLAGS) $(EXTRA_CFLAGS_PIC) -c -o $@ $<


LIBHTS_OBJS = \
Expand Down
58 changes: 58 additions & 0 deletions faidx.c
Original file line number Diff line number Diff line change
Expand Up @@ -914,6 +914,64 @@ int fai_adjust_region(const faidx_t *fai, int tid,
return ((orig_beg != *beg ? 1 : 0) |
(orig_end != *end && orig_end < HTS_POS_MAX ? 2 : 0));
}
static void fai_retrieve_into_buffer(const faidx_t *fai, const faidx1_t *val,
const uint64_t offset, const hts_pos_t beg, const hts_pos_t end,
char* s, hts_pos_t *len) {
size_t l;
int c = 0;
int ret;

if ((uint64_t) end - (uint64_t) beg >= SIZE_MAX - 2) {
hts_log_error("Range %"PRId64"..%"PRId64" too big", beg, end);
*len = -1;
return;
}

if (val->line_blen <= 0) {
hts_log_error("Invalid line length in index: %d", val->line_blen);
*len = -1;
return;
}

ret = bgzf_useek(fai->bgzf,
offset
+ beg / val->line_blen * val->line_len
+ beg % val->line_blen, SEEK_SET);

if (ret < 0) {
*len = -1;
hts_log_error("Failed to retrieve block. (Seeking in a compressed, .gzi unindexed, file?)");
return;
}

l = 0;

while ( l < end - beg && (c=bgzf_getc(fai->bgzf))>=0 )
if (isgraph(c)) s[l++] = c;
if (c < 0) {
hts_log_error("Failed to retrieve block: %s",
c == -1 ? "unexpected end of file" : "error reading file");
*len = -1;
return;
}

s[l] = '\0';
*len = l;
}

void faidx_fetch_seq_into_buffer(const faidx_t *fai,
const char *c_name, hts_pos_t p_beg_i, hts_pos_t p_end_i, char* s, hts_pos_t *len)
{
faidx1_t val;

// Adjust position
if (faidx_adjust_position(fai, 1,&val, c_name, &p_beg_i, &p_end_i, len)) {
*len = 0;
return;
}

fai_retrieve_into_buffer(fai, &val, val.seq_offset, p_beg_i, p_end_i + 1, s, len);
}

char *faidx_fetch_seq64(const faidx_t *fai, const char *c_name, hts_pos_t p_beg_i, hts_pos_t p_end_i, hts_pos_t *len)
{
Expand Down
2 changes: 1 addition & 1 deletion hfile.c
Original file line number Diff line number Diff line change
Expand Up @@ -1131,7 +1131,7 @@ static hFILE *hopen_unknown_scheme(const char *fname, const char *mode)
}

/* Returns the appropriate handler, or NULL if the string isn't an URL. */
static const struct hFILE_scheme_handler *find_scheme_handler(const char *s)
const struct hFILE_scheme_handler *find_scheme_handler(const char *s)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you see if there is a new api to use the htslib plugin mechanism for schemes like azb://.s3://, so we don't have to expose this function?

Copy link
Author

@eneskuluk eneskuluk Sep 22, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are new functions to list plugins and schemes. Explained here : samtools#1170 and samtools#1222. Not reviewed in depth yet.

{
static const struct hFILE_scheme_handler unknown_scheme =
{ hopen_unknown_scheme, hfile_always_local, "built-in", 0 };
Expand Down
2 changes: 1 addition & 1 deletion htscodecs
4 changes: 4 additions & 0 deletions htslib/faidx.h
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,10 @@ by end users by calling `free()` on it.
HTSLIB_EXPORT
char *faidx_fetch_seq(const faidx_t *fai, const char *c_name, int p_beg_i, int p_end_i, int *len);

void faidx_fetch_seq_into_buffer(const faidx_t *fai,
const char *c_name, hts_pos_t p_beg_i, hts_pos_t p_end_i,
char* s, hts_pos_t *len);

/// Fetch the sequence in a region
/** @param fai Pointer to the faidx_t struct
@param c_name Region name
Expand Down
1 change: 1 addition & 0 deletions htslib/synced_bcf_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ typedef struct bcf_sr_t
{
htsFile *file;
tbx_t *tbx_idx;
unsigned char read_one_record_only;
hts_idx_t *bcf_idx;
bcf_hdr_t *header;
hts_itr_t *itr;
Expand Down
74 changes: 67 additions & 7 deletions htslib/vcf.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,14 +63,22 @@ extern "C" {
#define BCF_HT_INT 1
#define BCF_HT_REAL 2
#define BCF_HT_STR 3
#define BCF_HT_LONG (BCF_HT_INT | 0x100) // BCF_HT_INT, but for int64_t values; VCF only!
#define BCF_HT_UINT 4
#define BCF_HT_CHAR 5
#define BCF_HT_INT64 6
#define BCF_HT_LONG BCF_HT_INT64 // BCF_HT_INT, but for int64_t values; VCF only!
#define BCF_HT_UINT64 7
#define BCF_HT_VOID 8
#define BCF_HT_DOUBLE 9
#define BCF_NUM_HT_TYPES 10

#define BCF_VL_FIXED 0 // variable length
#define BCF_VL_VAR 1
#define BCF_VL_A 2
#define BCF_VL_G 3
#define BCF_VL_R 4

#define BCF_VL_P 5 //ploidy
#define BCF_VL_Phased_Ploidy 6 //ploidy with phase
/* === Dictionary ===

The header keeps three dictionaries. The first keeps IDs in the
Expand All @@ -87,6 +95,10 @@ extern "C" {
#define BCF_DT_CTG 1
#define BCF_DT_SAMPLE 2

#define BCF_V_2_1_HEADER_MAGIC_STRING "BCF\2\1"
#define BCF_V_2_2_HEADER_MAGIC_STRING "BCF\2\2"
#define BCF_HEADER_MAGIC_STRING_LENGTH 5

// Complete textual representation of a header line
typedef struct bcf_hrec_t {
int type; // One of the BCF_HL_* type
Expand Down Expand Up @@ -144,10 +156,12 @@ extern uint8_t bcf_type_shift[];
#define VCF_INDEL (1<<2)
#define VCF_OTHER (1<<3)
#define VCF_BND (1<<4) // breakend
#define VCF_OVERLAP (1<<5) // overlapping deletion, ALT=*
#define VCF_OVERLAP (1<<5) // overlapping deletion, ALT=*
#define VCF_SPANNING_DELETION VCF_OVERLAP
#define VCF_INS (1<<6) // implies VCF_INDEL
#define VCF_DEL (1<<7) // implies VCF_INDEL
#define VCF_ANY (VCF_SNP|VCF_MNP|VCF_INDEL|VCF_OTHER|VCF_BND|VCF_OVERLAP|VCF_INS|VCF_DEL) // any variant type (but not VCF_REF)
#define VCF_NON_REF (1<<8)

typedef struct bcf_variant_t {
int type, n; // variant type and the number of bases affected, negative for deletions
Expand Down Expand Up @@ -237,6 +251,7 @@ typedef struct bcf1_t {
hts_pos_t rlen; // length of REF
int32_t rid; // CHROM
float qual; // QUAL
hts_pos_t m_end_point; //END - must be after QUAL due to a memcpy() in vcf.c
uint32_t n_info:16, n_allele:16;
uint32_t n_fmt:8, n_sample:24;
kstring_t shared, indiv;
Expand Down Expand Up @@ -338,6 +353,7 @@ typedef struct bcf1_t {
*/
HTSLIB_EXPORT
bcf_hdr_t *bcf_hdr_read(htsFile *fp) HTS_RESULT_USED;
bcf_hdr_t *bcf_hdr_read_required_sample_line(htsFile *hfp, const uint8_t is_sample_line_required);

/**
* bcf_hdr_set_samples() - for more efficient VCF parsing when only one/few samples are needed
Expand Down Expand Up @@ -375,6 +391,16 @@ typedef struct bcf1_t {
HTSLIB_EXPORT
int bcf_hdr_write(htsFile *fp, bcf_hdr_t *h) HTS_RESULT_USED;


/*
* Serialize BCF header into buffer
*
* Returns new offset value in buffer if the new data fits within the buffer capacity,
* else returns the same offset value without modifying the buffer
*/
size_t bcf_hdr_serialize(bcf_hdr_t* h, uint8_t* buffer, size_t offset, const size_t capacity, const uint8_t is_bcf, const uint8_t keep_idx_fields);

size_t bcf_hdr_deserialize(bcf_hdr_t* h, const uint8_t* buffer, const size_t offset, const size_t capacity, const uint8_t is_bcf);
/**
* Parse VCF line contained in kstring and populate the bcf1_t struct
* The line must not end with \n or \r characters.
Expand All @@ -396,6 +422,27 @@ typedef struct bcf1_t {
HTSLIB_EXPORT
int vcf_format(const bcf_hdr_t *h, const bcf1_t *v, kstring_t *s);

/*
* Same as vcf_format, but for bcfs
*
* Returns new offset value in buffer if the new data fits within the buffer capacity,
* else returns the same offset value without modifying the buffer
*
* If vcf, then the hdr and tmp pointers must be valid. For bcfs, they might be null
*/
size_t bcf_serialize(bcf1_t* v, uint8_t* buffer, size_t offset, const size_t capacity, const uint8_t is_bcf, const bcf_hdr_t* hdr, kstring_t* tmp);
/*
* Same as vcf_parse, but for bcfs
*
* Returns new offset value in buffer if a full vcf record is read,
* else returns the same offset value
*
* If vcf, then the hdr and tmp pointers must be valid. For bcfs, they might be null
*
* Note that vcf parsing modifies the buffer (tokenize function)
*/
size_t bcf_deserialize(bcf1_t* v, uint8_t* buffer, const size_t offset, const size_t capacity, const uint8_t is_bcf, const bcf_hdr_t* hdr);

/// Read next VCF or BCF record
/** @param fp The file to read the record from
@param h The header for the vcf/bcf file
Expand Down Expand Up @@ -468,7 +515,7 @@ set to one of BCF_ERR* codes and must be checked before calling bcf_write().
*/
HTSLIB_EXPORT
bcf_hdr_t *vcf_hdr_read(htsFile *fp) HTS_RESULT_USED;

bcf_hdr_t *vcf_hdr_read_required_sample_line(htsFile *fp, const uint8_t is_sample_line_required);
/// Write a VCF format header
/** @param fp Output file
@param h The header to write
Expand Down Expand Up @@ -651,6 +698,8 @@ set to one of BCF_ERR* codes and must be checked before calling bcf_write().
/** The following functions are for internal use and should rarely be called directly */
HTSLIB_EXPORT
int bcf_hdr_parse(bcf_hdr_t *hdr, char *htxt);
int bcf_hdr_parse_required_sample_line(bcf_hdr_t *hdr, char *htxt, size_t* hdr_length,
const uint8_t is_sample_line_required);

/// Synchronize internal header structures
/** @param h Header
Expand Down Expand Up @@ -988,6 +1037,8 @@ set to one of BCF_ERR* codes and must be checked before calling bcf_write().
return bcf_update_info(hdr, line, key, values, n, BCF_HT_LONG);
}

void bcf_set_end_point_from_info(const bcf_hdr_t* hdr, bcf1_t* line);

/*
* bcf_update_format_*() - functions for updating FORMAT fields
* @values: pointer to the array of values, the same number of elements
Expand Down Expand Up @@ -1248,6 +1299,7 @@ set to one of BCF_ERR* codes and must be checked before calling bcf_write().
#define bcf_hdr_id2coltype(hdr,type,int_id) (uint32_t)((hdr)->id[BCF_DT_ID][int_id].val->info[type] & 0xf)
#define bcf_hdr_idinfo_exists(hdr,type,int_id) ((int_id)>=0 && (int_id)<(hdr)->n[BCF_DT_ID] && (hdr)->id[BCF_DT_ID][int_id].val && bcf_hdr_id2coltype((hdr),(type),(int_id))!=0xf)
#define bcf_hdr_id2hrec(hdr,dict_type,col_type,int_id) ((hdr)->id[(dict_type)==BCF_DT_CTG?BCF_DT_CTG:BCF_DT_ID][int_id].val->hrec[(dict_type)==BCF_DT_CTG?0:(col_type)])
uint64_t bcf_hdr_id2contig_length(const bcf_hdr_t* hdr, const int id);
/// Convert BCF FORMAT data to string form
/**
* @param s kstring to write into
Expand Down Expand Up @@ -1520,7 +1572,7 @@ static inline int bcf_format_gt(bcf_fmt_t *fmt, int isample, kstring_t *str)
return e == 0 ? 0 : -1;
}

static inline int bcf_enc_size(kstring_t *s, int size, int type)
static inline int bcf_enc_size(kstring_t *s, size_t size, int type)
{
// Most common case is first
if (size < 15) {
Expand All @@ -1546,11 +1598,18 @@ static inline int bcf_enc_size(kstring_t *s, int size, int type)
*p++ = 1<<4|BCF_BT_INT16;
i16_to_le(size, p);
s->l += 4;
} else {
}
else if(size <= INT32_MAX){
*p++ = 1<<4|BCF_BT_INT32;
i32_to_le(size, p);
s->l += 6;
}
else{
*p++ = 1<<4|BCF_BT_INT64;
i64_to_le(size,p);
s->l += 10;
return -1;
}
}
return 0;
}
Expand All @@ -1559,7 +1618,8 @@ static inline int bcf_enc_inttype(long x)
{
if (x <= BCF_MAX_BT_INT8 && x >= BCF_MIN_BT_INT8) return BCF_BT_INT8;
if (x <= BCF_MAX_BT_INT16 && x >= BCF_MIN_BT_INT16) return BCF_BT_INT16;
return BCF_BT_INT32;
if (x <= BCF_MAX_BT_INT32 && x >= BCF_MIN_BT_INT32) return BCF_BT_INT32;
return BCF_BT_INT64;
}

static inline int bcf_enc_int1(kstring_t *s, int32_t x)
Expand Down
10 changes: 7 additions & 3 deletions synced_bcf_reader.c
Original file line number Diff line number Diff line change
Expand Up @@ -420,8 +420,12 @@ static void bcf_sr_destroy1(bcf_sr_t *reader)
free(reader->fname);
if ( reader->tbx_idx ) tbx_destroy(reader->tbx_idx);
if ( reader->bcf_idx ) hts_idx_destroy(reader->bcf_idx);
bcf_hdr_destroy(reader->header);
hts_close(reader->file);
if (reader->header) {
bcf_hdr_destroy(reader->header);
}
if (reader->file) {
hts_close(reader->file);
}
if ( reader->itr ) tbx_itr_destroy(reader->itr);
int j;
for (j=0; j<reader->mbuffer; j++)
Expand Down Expand Up @@ -693,7 +697,7 @@ static int _reader_fill_buffer(bcf_srs_t *files, bcf_sr_t *reader)
reader->nbuffer++;

if ( reader->buffer[reader->nbuffer]->rid != reader->buffer[1]->rid ) break;
if ( reader->buffer[reader->nbuffer]->pos != reader->buffer[1]->pos ) break; // the buffer is full
if ( reader->read_one_record_only || reader->buffer[reader->nbuffer]->pos != reader->buffer[1]->pos ) break; // the buffer is full
}
if ( ret<0 )
{
Expand Down
Loading