Skip to content

Commit 8de2d22

Browse files
ariostasianna
andauthored
feat: implement caching support for RNTuple reading (#1513)
* Implemented reading cache for RNTuples * Fixed issue with nonzero entry_start for deferred columns * Better test * Reverted granularity of page ranges * Perform all reading operations in place * A bit of cleanup * Move cache to the cluster level * Fix cuda tests * Copilot fixes --------- Co-authored-by: Ianna Osborne <ianna.osborne@cern.ch>
1 parent 2751f8a commit 8de2d22

File tree

5 files changed

+239
-99
lines changed

5 files changed

+239
-99
lines changed

src/uproot/behaviors/RNTuple.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import uproot.language.python
2424
import uproot.source.chunk
2525
from uproot._util import no_filter, unset
26+
from uproot.behaviors.TBranch import _regularize_array_cache
2627

2728

2829
def iterate(
@@ -615,7 +616,7 @@ def arrays(
615616
entry_start=None,
616617
entry_stop=None,
617618
decompression_executor=None, # TODO: Not implemented yet
618-
array_cache="inherit", # TODO: Not implemented yet
619+
array_cache="inherit",
619620
library="ak", # TODO: Not implemented yet
620621
backend="cpu",
621622
interpreter="cpu",
@@ -659,7 +660,7 @@ def arrays(
659660
is used. (Not implemented yet.)
660661
array_cache ("inherit", None, MutableMapping, or memory size): Cache of arrays;
661662
if "inherit", use the file's cache; if None, do not use a cache;
662-
if a memory size, create a new cache of this size. (Not implemented yet.)
663+
if a memory size, create a new cache of this size.
663664
library (str or :doc:`uproot.interpretation.library.Library`): The library
664665
that is used to represent arrays. Options are ``"np"`` for NumPy,
665666
``"ak"`` for Awkward Array, and ``"pd"`` for Pandas. (Not implemented yet.)
@@ -725,6 +726,8 @@ def arrays(
725726
[c.num_entries for c in clusters[start_cluster_idx:stop_cluster_idx]]
726727
)
727728

729+
array_cache = _regularize_array_cache(array_cache, self.ntuple._file)
730+
728731
form, field_path = self.to_akform(
729732
filter_name=filter_name,
730733
filter_typename=filter_typename,
@@ -747,17 +750,22 @@ def arrays(
747750
clusters_datas,
748751
start_cluster_idx,
749752
stop_cluster_idx,
750-
pad_missing_element=True,
751753
)
752754

753755
for key in target_cols:
754756
if "column" in key:
755757
key_nr = int(key.split("-")[1])
758+
# Find how many elements should be padded at the beginning
759+
n_padding = self.ntuple.column_records[key_nr].first_element_index
760+
n_padding -= cluster_starts[start_cluster_idx]
761+
n_padding = max(n_padding, 0)
756762
if interpreter == "cpu":
757-
content = self.ntuple.read_col_pages(
763+
content = self.ntuple.read_cluster_range(
758764
key_nr,
759-
range(start_cluster_idx, stop_cluster_idx),
760-
pad_missing_element=True,
765+
start_cluster_idx,
766+
stop_cluster_idx,
767+
missing_element_padding=n_padding,
768+
array_cache=array_cache,
761769
)
762770
elif interpreter == "gpu" and backend == "cuda":
763771
content = content_dict[key_nr]

0 commit comments

Comments
 (0)