Skip to content

Commit

Permalink
Start work on interpretations; read at least one basket. (#16)
Browse files Browse the repository at this point in the history
* Sketch of what's needed in the new Interpretation.

* Filling in the AsDtype Interpretation.

* The AsDtype is probably about right.

* Cleaned up.

* Tricky cache bug: don't take objects from cache whose files are closed.

* Using the same code to read baskets as recovery-baskets.

* Placeholder for compressed baskets.

* Dealing with compressed buffers and baskets with offsets.

* More conveniences: context managers pass down from all objects and access objects in files with bar syntax.

* Reading embedded TBaskets can't be quite the same code path as regular TBaskets.

* Basket recovery works.

* Avoid pejorative language 'recover'; instead use 'embedded'.

* Cleaned up; done with PR.

* Fix flake8.
  • Loading branch information
jpivarski authored May 30, 2020
1 parent 337e3ae commit 8475076
Show file tree
Hide file tree
Showing 20 changed files with 1,392 additions and 158 deletions.
137 changes: 137 additions & 0 deletions tests/test_0016-interpretations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/uproot4/blob/master/LICENSE

from __future__ import absolute_import

import sys
import json

try:
from io import StringIO
except ImportError:
from StringIO import StringIO

import numpy
import pytest
import skhep_testdata

import uproot4


def test_get_key():
with uproot4.open(skhep_testdata.data_path("uproot-mc10events.root")) as f:
assert f["Events"].name == "Events"
assert f["Events/Info"].name == "Info"
assert f["Events/Info/evtNum"].name == "evtNum"
assert f["Events"]["Info/evtNum"].name == "evtNum"
assert f["Events"]["/Info/evtNum"].name == "evtNum"
assert f["Events/evtNum"].name == "evtNum"
assert f["Events"]["evtNum"].name == "evtNum"
assert f["Events"]["/Info"].name == "Info"
assert f["Events"]["/Info/"].name == "Info"
with pytest.raises(KeyError):
f["Events"]["/evtNum"]


def test_basket_data():
with uproot4.open(
skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root")
) as f:
assert f["sample/i4"].basket_key(3).fSeekKey == 35042
assert f["sample/i4"].basket(3).data.view(">i4").tolist() == [
6,
7,
8,
9,
10,
11,
12,
]


def test_compressed():
with uproot4.open(skhep_testdata.data_path("uproot-mc10events.root")) as f:
basket = f["Events/Muon.q"].basket(0)
assert basket.data.view(">i4").tolist() == [
-1,
-1,
-1,
1,
1,
-1,
-1,
-1,
1,
-1,
-1,
-1,
-1,
1,
]
assert basket.byte_offsets.tolist() == [0, 4, 4, 16, 28, 28, 32, 52, 52, 56, 56]


def test_read_all():
filename = skhep_testdata.data_path("uproot-issue327.root")
with uproot4.open(filename, minimal_ttree_metadata=False) as f:
f["DstTree/fTracks.fCharge"]


@pytest.mark.parametrize("mini", [False, True])
def test_recovery(mini):
# flat array to recover:
filename = skhep_testdata.data_path("uproot-issue21.root")
with uproot4.open(
"file:" + filename + " | nllscan/mH", minimal_ttree_metadata=mini
) as branch:
basket = branch.basket(0)
assert basket.data.view(">f8").tolist()[:10] == [
124.0,
124.09089660644531,
124.18180084228516,
124.27269744873047,
124.36360168457031,
124.45449829101562,
124.54550170898438,
124.63639831542969,
124.72730255126953,
124.81819915771484,
]
assert basket.byte_offsets is None
assert branch.entry_offsets == [0, branch.num_entries]

# jagged arrays to recover:

# uproot-issue327.root DstTree: fTracks.fCharge
# uproot-issue232.root fTreeV0: V0s.fV0pt MCparticles.nbodies
# uproot-issue187.root fTreeV0: V0s.fV0pt MCparticles.nbodies
# uproot-from-geant4.root Details: numgood, TrackedRays: Event phi
filename = skhep_testdata.data_path("uproot-issue327.root")
with uproot4.open(
"file:" + filename + " | DstTree/fTracks.fCharge", minimal_ttree_metadata=mini
) as branch:
basket = branch.basket(0)
assert basket.data.view("i1")[:10].tolist() == [
1,
-1,
1,
1,
-1,
-1,
1,
-1,
-1,
-1,
]
assert basket.byte_offsets[:10].tolist() == [
0,
2,
37,
56,
60,
81,
82,
112,
112,
112,
]
assert branch.entry_offsets == [0, branch.num_entries]
38 changes: 38 additions & 0 deletions uproot4/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
object_cache = LRUCache(100)
array_cache = LRUArrayCache("100 MB")

import uproot4.interpret

library = "ak"

from uproot4.source.memmap import MemmapSource
from uproot4.source.file import FileSource
from uproot4.source.http import HTTPSource
Expand Down Expand Up @@ -50,6 +54,7 @@

# FIXME: add uproot4.models.TRef


import pkgutil
import uproot4.behaviors

Expand Down Expand Up @@ -81,3 +86,36 @@ def behavior_of(classname):
]

del pkgutil


class KeyInFileError(KeyError):
def __init__(self, key, file_path, cycle=None, because=""):
super(KeyInFileError, self).__init__(key)
self.key = key
self.file_path = file_path
self.cycle = cycle
self.because = because

def __str__(self):
if self.because == "":
because = ""
else:
because = " because " + self.because
if self.cycle == "any":
return """not found: {0} (with any cycle number){1}
in file {2}""".format(
repr(self.key), because, self.file_path
)
elif self.cycle is None:
return """not found: {0}{1}
in file {2}""".format(
repr(self.key), because, self.file_path
)
else:
return """not found: {0} with cycle {1}{2}
in file {3}""".format(
repr(self.key), self.cycle, because, self.file_path
)


from uproot4._util import no_filter
13 changes: 10 additions & 3 deletions uproot4/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
import glob

try:
from urlparse import urlparse
except ImportError:
from urllib.parse import urlparse
except ImportError:
from urlparse import urlparse

import numpy

Expand Down Expand Up @@ -82,9 +82,13 @@ def _regularize_filter_regex_flags(flags):
return flagsbyte


def no_filter(x):
return True


def regularize_filter(filter):
if filter is None:
return lambda x: True
return no_filter
elif callable(filter):
return filter
elif isstr(filter):
Expand Down Expand Up @@ -112,8 +116,10 @@ def regularize_filter(filter):
def path_to_source_class(file_path, options):
if isinstance(file_path, getattr(os, "PathLike", ())):
file_path = os.fspath(file_path)

elif hasattr(file_path, "__fspath__"):
file_path = file_path.__fspath__()

elif file_path.__class__.__module__ == "pathlib":
import pathlib

Expand All @@ -124,6 +130,7 @@ def path_to_source_class(file_path, options):
os.name == "nt" and _windows_absolute_path_pattern.match(file_path) is not None
)
parsed_url = urlparse(file_path)

if (
parsed_url.scheme == "file"
or len(parsed_url.scheme) == 0
Expand Down
Loading

0 comments on commit 8475076

Please sign in to comment.