Skip to content

Commit 20ebdc7

Browse files
committed
.get_chromosome_rle
1 parent ca4747e commit 20ebdc7

File tree

4 files changed

+53
-7
lines changed

4 files changed

+53
-7
lines changed

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ useDynLib(SeqArray,
99
SEQ_SetSpaceChrom, SEQ_SetSpaceAnnotID,
1010
SEQ_SplitSelection, SEQ_SplitSelectionX,
1111
SEQ_GetSpace, SEQ_GetSpaceSample, SEQ_GetSpaceVariant,
12+
SEQ_Get_ChromRLE,
1213
SEQ_Summary, SEQ_System,
1314
SEQ_VCF_NumLines, SEQ_VCF_Split, SEQ_VCF_Parse,
1415
SEQ_ToVCF_Init, SEQ_ToVCF_Done,

R/Internal.R

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,17 @@ process_count <- 1L
255255

256256

257257

258+
#######################################################################
259+
# Get RLE-coded chromosome
260+
#
261+
.get_chromosome_rle <- function(gdsfile)
262+
{
263+
v <- .Call(SEQ_Get_ChromRLE, gdsfile)
264+
structure(list(lengths=v[[1L]], values=v[[2L]]), class="rle")
265+
}
266+
267+
268+
258269
#######################################################################
259270
# Open and close a connection,
260271
# Please always call '.close_conn' after '.open_bin' and '.open_text'

src/Index.h

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,8 @@ class ErrSeqArray;
7676
template<typename TYPE> class COREARRAY_DLL_LOCAL C_RLE
7777
{
7878
public:
79+
friend class CChromIndex;
80+
7981
/// constructor
8082
C_RLE()
8183
{
@@ -249,25 +251,26 @@ class COREARRAY_DLL_LOCAL CChromIndex
249251

250252
typedef vector<TRange> TRangeList;
251253

254+
/// map to TRangeList from chromosome coding
255+
map<string, TRangeList> Map;
256+
252257
/// constructor
253258
CChromIndex();
254-
255259
/// clear
256260
void Clear();
257-
258261
/// represent chromosome codes as a RLE object in Map
259262
void AddChrom(PdGDSFolder Root);
260-
261263
/// the total length of a TRangeList object
262264
size_t RangeTotalLength(const TRangeList &RngList);
263265

264266
/// whether it is empty
265267
inline bool Empty() const { return Map.empty(); }
266268
/// return chromosome coding with the index
267269
inline const string &operator [](size_t idx) { return RleChr[idx]; }
268-
269-
/// map to TRangeList from chromosome coding
270-
map<string, TRangeList> Map;
270+
/// return the RLE representation of chromosome: value
271+
inline vector<string> &RLE_Values() { return RleChr.Values; }
272+
/// return the RLE representation of chromosome: length
273+
inline vector<C_UInt32> &RLE_Lengths() { return RleChr.Lengths; }
271274

272275
protected:
273276
/// indexing chromosome

src/SeqArray.cpp

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1363,7 +1363,7 @@ COREARRAY_DLL_EXPORT SEXP SEQ_ClearVarMap(SEXP gdsfile)
13631363

13641364

13651365
// ===========================================================
1366-
// Clear VarMap in a GDS file
1366+
// Get or clear the memory buffer storing variant positions
13671367
// ===========================================================
13681368

13691369
COREARRAY_DLL_EXPORT SEXP SEQ_BufferPosition(SEXP gdsfile, SEXP clear)
@@ -1385,6 +1385,36 @@ COREARRAY_DLL_EXPORT SEXP SEQ_BufferPosition(SEXP gdsfile, SEXP clear)
13851385

13861386

13871387

1388+
// ===========================================================
1389+
// Get RLE-coded chromosome
1390+
// ===========================================================
1391+
1392+
COREARRAY_DLL_EXPORT SEXP SEQ_Get_ChromRLE(SEXP gdsfile)
1393+
{
1394+
COREARRAY_TRY
1395+
CFileInfo &File = GetFileInfo(gdsfile);
1396+
CChromIndex &Chrom = File.Chromosome();
1397+
vector<string> &V = Chrom.RLE_Values();
1398+
vector<C_UInt32> &L = Chrom.RLE_Lengths();
1399+
if (V.size() != L.size())
1400+
throw ErrSeqArray("Invalid RLE of chromosome!");
1401+
rv_ans = PROTECT(NEW_LIST(2));
1402+
SEXP l = NEW_INTEGER(L.size());
1403+
SET_ELEMENT(rv_ans, 0, l);
1404+
memcpy(INTEGER(l), &L[0], sizeof(int)*L.size());
1405+
SEXP v = NEW_CHARACTER(V.size());
1406+
SET_ELEMENT(rv_ans, 1, v);
1407+
for (size_t i=0; i < V.size(); i++)
1408+
{
1409+
string &s = V[i];
1410+
SET_STRING_ELT(v, i, Rf_mkCharLen(&s[0], s.size()));
1411+
}
1412+
UNPROTECT(1);
1413+
COREARRAY_CATCH
1414+
}
1415+
1416+
1417+
13881418
// ===========================================================
13891419
// Get system configuration
13901420
// ===========================================================
@@ -1677,6 +1707,7 @@ COREARRAY_DLL_EXPORT void R_init_SeqArray(DllInfo *info)
16771707

16781708
CALL(SEQ_IntAssign, 2), CALL(SEQ_AppendFill, 3),
16791709
CALL(SEQ_ClearVarMap, 1), CALL(SEQ_BufferPosition, 2),
1710+
CALL(SEQ_Get_ChromRLE, 1),
16801711

16811712
CALL(SEQ_bgzip_create, 1),
16821713
CALL(SEQ_ToVCF_Init, 6), CALL(SEQ_ToVCF_Done, 0),

0 commit comments

Comments
 (0)