Skip to content

Commit

Permalink
Fill out the array-fetching interface. (#18)
Browse files Browse the repository at this point in the history
* Fill out the array-fetching interface.

* Pandas merging.

* Try using conda without a custom action.

* Explicitly 'conda init bash'.

* Use Henry's suggested syntax.

* See if it will install awkward1.

* Reading JaggedArrays (as NumPy).

* Fix flake8 error and take Henry's suggestion on build-test.yml.

* Jagged arrays in Awkward and Pandas.

* Works for std::vectors (i.e. jagged arrays with byte headers).

* Implemented but have not tested TTree.arrays.

* Tested TTree.arrays with a mixture of flat and jagged arrays.

* Default behavior is to not zip them.

* Aliases and cache work.

* Before rename 'interpret' -> 'interpretation'.

* Renamed 'interpret' -> 'interpretation'.

* Computable names (for fAliases) work.
  • Loading branch information
jpivarski authored Jun 3, 2020
1 parent ef5109e commit 59841bb
Show file tree
Hide file tree
Showing 19 changed files with 2,566 additions and 331 deletions.
27 changes: 15 additions & 12 deletions .github/workflows/build-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,46 +17,49 @@ jobs:
steps:
- uses: "actions/checkout@v2"

- name: "Python ${{ matrix.python-version }}"
uses: "goanpeca/setup-miniconda@v1"
with:
auto-update-conda: true

- name: "create environment"
- name: "environment for ${{ matrix.python-version }}"
run: |
eval "$(conda shell.bash hook)"
conda create -n testing python=${{ matrix.python-version }}
conda init bash
conda config --add channels conda-forge
conda info
- name: "install most dependencies"
shell: "bash -l {0}"
run: |
eval "$(conda shell.bash hook)"
conda activate testing
conda env list
conda install numpy pytest flake8
conda install numpy pandas pytest flake8
pip install scikit-hep-testdata
conda list
- name: "install Awkward"
run: |
eval "$(conda shell.bash hook)"
conda activate testing
conda env list
pip install awkward1
conda list
- name: "install XRootD"
shell: "bash -l {0}"
if: "${{ matrix.python-version != 3.5 }}"
run: |
eval "$(conda shell.bash hook)"
conda activate testing
conda env list
conda install xrootd
conda list
- name: "flake8"
shell: "bash -l {0}"
run: |
eval "$(conda shell.bash hook)"
conda activate testing
conda env list
python -m flake8
- name: "pytest"
shell: "bash -l {0}"
run: |
eval "$(conda shell.bash hook)"
conda activate testing
conda env list
python -m pytest -vv tests
2 changes: 2 additions & 0 deletions requirements-test.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
pytest
flake8
scikit-hep-testdata
pandas
awkward1
51 changes: 31 additions & 20 deletions tests/test_0017-multi-basket-multi-branch-fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@
import skhep_testdata

import uproot4
import uproot4.interpret.numerical
import uproot4.interpret.library
import uproot4.interpretation.numerical
import uproot4.interpretation.library
import uproot4.source.futures


def test_any_basket():
interpretation = uproot4.interpret.numerical.AsDtype(">i4")
interpretation = uproot4.interpretation.numerical.AsDtype(">i4")

with uproot4.open(
skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root")
Expand Down Expand Up @@ -69,12 +69,12 @@ def test_any_basket():


def test_stitching_arrays():
interpretation = uproot4.interpret.numerical.AsDtype("i8")
interpretation = uproot4.interpretation.numerical.AsDtype("i8")
expectation = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
basket_arrays = [[0, 1, 2, 3, 4], [5, 6], [], [7, 8, 9], [10], [11, 12, 13, 14]]
basket_arrays = [numpy.array(x) for x in basket_arrays]
entry_offsets = numpy.array([0, 5, 7, 7, 10, 11, 15])
library = uproot4.interpret.library._libraries["np"]
library = uproot4.interpretation.library._libraries["np"]

for start in range(16):
for stop in range(15, -1, -1):
Expand All @@ -84,11 +84,22 @@ def test_stitching_arrays():
assert expectation[start:stop] == actual.tolist()


def _names_entries_to_ranges_or_baskets(self, branch_names, entry_start, entry_stop):
out = []
for name in branch_names:
branch = self[name]
for basket_num, range_or_basket in branch.entries_to_ranges_or_baskets(
entry_start, entry_stop
):
out.append((name, branch, basket_num, range_or_basket))
return out


def test_names_entries_to_ranges_or_baskets():
with uproot4.open(
skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root")
)["sample"] as sample:
out = sample._names_entries_to_ranges_or_baskets(["i4"], 0, 30)
out = _names_entries_to_ranges_or_baskets(sample, ["i4"], 0, 30)
assert all(x[0] == "i4" for x in out)
assert [x[2] for x in out] == [0, 1, 2, 3, 4]
assert [x[3] for x in out] == [
Expand All @@ -106,25 +117,25 @@ def test_ranges_or_baskets_to_arrays():
)["sample"] as sample:
branch = sample["i4"]

ranges_or_baskets = sample._names_entries_to_ranges_or_baskets(["i4"], 0, 30)
ranges_or_baskets = _names_entries_to_ranges_or_baskets(sample, ["i4"], 0, 30)
branchid_interpretation = {
id(branch): uproot4.interpret.numerical.AsDtype(">i4")
id(branch): uproot4.interpretation.numerical.AsDtype(">i4")
}
entry_start, entry_stop = (0, 30)
decompression_executor = uproot4.source.futures.TrivialExecutor()
interpretation_executor = uproot4.source.futures.TrivialExecutor()
array_cache = None
library = uproot4.interpret.library._libraries["np"]
library = uproot4.interpretation.library._libraries["np"]

output = sample._ranges_or_baskets_to_arrays(
output = {}
sample._ranges_or_baskets_to_arrays(
ranges_or_baskets,
branchid_interpretation,
entry_start,
entry_stop,
decompression_executor,
interpretation_executor,
array_cache,
library,
output,
)
assert output["i4"].tolist() == [
-15,
Expand Down Expand Up @@ -165,7 +176,7 @@ def test_branch_array():
skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root")
)["sample/i4"] as branch:
assert branch.array(
uproot4.interpret.numerical.AsDtype(">i4"), library="np"
uproot4.interpretation.numerical.AsDtype(">i4"), library="np"
).tolist() == [
-15,
-14,
Expand Down Expand Up @@ -200,7 +211,7 @@ def test_branch_array():
]

assert branch.array(
uproot4.interpret.numerical.AsDtype(">i4"),
uproot4.interpretation.numerical.AsDtype(">i4"),
entry_start=3,
entry_stop=-5,
library="np",
Expand Down Expand Up @@ -230,7 +241,7 @@ def test_branch_array():
]

assert branch.array(
uproot4.interpret.numerical.AsDtype(">i4"),
uproot4.interpretation.numerical.AsDtype(">i4"),
entry_start=3,
entry_stop=-5,
interpretation_executor=uproot4.decompression_executor,
Expand Down Expand Up @@ -261,7 +272,7 @@ def test_branch_array():
]

with pytest.raises(ValueError):
branch.array(uproot4.interpret.numerical.AsDtype(">i8"), library="np")
branch.array(uproot4.interpretation.numerical.AsDtype(">i8"), library="np")


def test_cache():
Expand All @@ -276,17 +287,17 @@ def test_cache():
)
i4 = f["sample/i4"]
assert list(f.array_cache) == []
i4.array(uproot4.interpret.numerical.AsDtype(">i4"), library="np")
i4.array(uproot4.interpretation.numerical.AsDtype(">i4"), library="np")
assert list(f.array_cache) == [
"db4be408-93ad-11ea-9027-d201a8c0beef:/sample:i4:AsDtype(Bi4(),Li4()):0-30:np"
]

with pytest.raises(OSError):
i4.array(
uproot4.interpret.numerical.AsDtype(">i4"), entry_start=3, library="np"
uproot4.interpretation.numerical.AsDtype(">i4"), entry_start=3, library="np"
)

i4.array(uproot4.interpret.numerical.AsDtype(">i4"), library="np")
i4.array(uproot4.interpretation.numerical.AsDtype(">i4"), library="np")


def test_pandas():
Expand All @@ -295,7 +306,7 @@ def test_pandas():
skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root")
)["sample/i4"] as branch:
series = branch.array(
uproot4.interpret.numerical.AsDtype(">i4"),
uproot4.interpretation.numerical.AsDtype(">i4"),
entry_start=3,
entry_stop=-5,
interpretation_executor=uproot4.decompression_executor,
Expand Down
Loading

0 comments on commit 59841bb

Please sign in to comment.