Skip to content

Commit 63c9a30

Browse files
committed
return RLE chromosome
1 parent 20ebdc7 commit 63c9a30

File tree

4 files changed

+46
-2
lines changed

4 files changed

+46
-2
lines changed

NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@ UTILITIES
1515

1616
o new argument 'digest' in `seqRecompress()` to add MD5 hash codes
1717

18+
o `seqGetData(, "$chromosome")` returns chromosome codes in an object of
19+
'S4Vectors::Rle'
20+
1821
BUG FIXES
1922

2023
o `seqBlockApply()` should recover the filter when the user-specified function fails

man/seqBlockApply.Rd

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ seqBlockApply(gdsfile, var.name, FUN, margin=c("by.variant"),
5555
\code{"annotation/format/@VARIABLE_NAME"} are used to obtain the index
5656
associated with these variables.
5757

58+
\code{"$chromosome"} returns chromosome codes in an object of \code{S4Vectors::Rle}.
59+
5860
\code{"$dosage"} is also allowed for the dosages of reference allele (integer:
5961
0, 1, 2 and NA for diploid genotypes).
6062

man/seqGetData.Rd

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ components) with a class name \code{SeqVarDataList}.
3838
\code{"annotation/format/@VARIABLE_NAME"} are used to obtain the index
3939
associated with these variables.
4040

41+
\code{"$chromosome"} returns chromosome codes in an object of \code{S4Vectors::Rle}.
42+
4143
\code{"$dosage"} is also allowed for the dosages of reference allele (integer:
4244
0, 1, 2 and NA for diploid genotypes).
4345

src/GetData.cpp

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
//
33
// GetData.cpp: Get data from a SeqArray GDS file
44
//
5-
// Copyright (C) 2015-2024 Xiuwen Zheng
5+
// Copyright (C) 2015-2025 Xiuwen Zheng
66
//
77
// This file is part of SeqArray.
88
//
@@ -58,6 +58,7 @@ static const string VAR_GENO_INDEX("@genotype");
5858
static const string VAR_PHASE("phase");
5959

6060
// variable list: internally generated
61+
static const string VAR_CHROM_RLE("$chromosome");
6162
static const string VAR_DOSAGE("$dosage");
6263
static const string VAR_DOSAGE_ALT("$dosage_alt");
6364
static const string VAR_DOSAGE_ALT2("$dosage_alt2");
@@ -152,6 +153,38 @@ static SEXP get_chrom(CFileInfo &File, TVarMap &Var, void *param)
152153
return rv_ans;
153154
}
154155

156+
/// get RLE-coded chromosome from '$chromosome'
157+
static SEXP get_chrom_rle(CFileInfo &File, TVarMap &Var, void *param)
158+
{
159+
int n = File.VariantSelNum();
160+
SEXP rv_ans = PROTECT(NEW_CHARACTER(n));
161+
if (n > 0)
162+
{
163+
CChromIndex &Chrom = File.Chromosome();
164+
TSelection &Sel = File.Selection();
165+
C_BOOL *s = Sel.pVariant + Sel.varStart;
166+
size_t p = 0, i = Sel.varStart;
167+
SEXP lastR = Rf_mkChar("");
168+
string lastS;
169+
for (; n > 0; i++)
170+
{
171+
if (*s++)
172+
{
173+
const string &ss = Chrom[i];
174+
if (ss != lastS)
175+
{
176+
lastS = ss;
177+
lastR = Rf_mkChar(ss.c_str());
178+
}
179+
SET_STRING_ELT(rv_ans, p++, lastR);
180+
n--;
181+
}
182+
}
183+
}
184+
UNPROTECT(1);
185+
return rv_ans;
186+
}
187+
155188
/// get variant-specific data from node without indexing (e.g., variant.id)
156189
static SEXP get_data_1d(CFileInfo &File, TVarMap &Var, void *param)
157190
{
@@ -1141,7 +1174,11 @@ COREARRAY_DLL_LOCAL TVarMap &VarGetStruct(CFileInfo &File, const string &name)
11411174
} else
11421175
// ===========================================================
11431176
// internally generated variables
1144-
if (name == VAR_DOSAGE)
1177+
if (name == VAR_CHROM_RLE)
1178+
{
1179+
vm.Init(File, VAR_CHROM, get_chrom_rle);
1180+
CHECK_VARIANT_ONE_DIMENSION
1181+
} else if (name == VAR_DOSAGE)
11451182
{
11461183
vm.Func = get_dosage;
11471184
} else if (name == VAR_DOSAGE_ALT)

0 commit comments

Comments
 (0)