Skip to content

Commit

Permalink
Support output column allocation within a table function (#341)
Browse files Browse the repository at this point in the history
* Add support for kTableFunctionSpecifiedParameter and set_output_row_size
* Add byval metadata to module
* update rbc_test.yml
  • Loading branch information
guilhermeleobas authored Sep 7, 2021
1 parent cacf447 commit 3c9d3fa
Show file tree
Hide file tree
Showing 14 changed files with 176 additions and 67 deletions.
26 changes: 6 additions & 20 deletions .github/workflows/rbc_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,6 @@ on:
pull_request:
branches:
- master
# workflow_dispatch:
# inputs:
# debug_enabled:
# description: 'Run the build with tmate debugging enabled (https://github.com/marketplace/actions/debugging-with-tmate)'
# required: false
# default: false

jobs:
lint:
Expand Down Expand Up @@ -65,15 +59,13 @@ jobs:
shell: bash -l {0}
run: |
conda env config vars set MAMBA_NO_BANNER=1
- name: Create rbc test environment v${{ matrix.python-version }}
shell: bash -l {0}
run: |
cat .conda/environment.yml > rbc_test.yaml
echo " - numba=${{ matrix.numba-version }}" >> rbc_test.yaml
echo " - python=${{ matrix.python-version }}" >> rbc_test.yaml
mamba env create --file=rbc_test.yaml -n rbc
- name: rbc conda config
shell: bash -l {0}
run: mamba run -n rbc conda config --show
Expand All @@ -82,12 +74,10 @@ jobs:
shell: bash -l {0}
run: |
mamba run -n rbc conda list
- name: Develop rbc
shell: bash -l {0}
run: |
mamba run -n rbc python setup.py develop
- name: Run rbc tests
shell: bash -l {0}
env:
Expand Down Expand Up @@ -209,10 +199,6 @@ jobs:
run: |
mamba run -n rbc python setup.py develop
# - name: Setup tmate session
# uses: mxschmitt/action-tmate@v3
# if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.debug_enabled }}

- name: Run rbc tests [docker]
shell: bash -l {0}
if: matrix.os == 'ubuntu-latest' && matrix.omniscidb-from == 'docker'
Expand Down Expand Up @@ -248,16 +234,16 @@ jobs:
test -f omniscidb.pid && kill -INT `cat omniscidb.pid`
sleep 5
- name: Show Omniscidb server output [conda]
shell: bash -l {0}
if: matrix.os == 'ubuntu-latest' && matrix.omniscidb-from == 'conda'
run: |
cat omniscidb-output.txt
- name: Show Omniscidb docker logs on failure [docker]
shell: bash -l {0}
timeout-minutes: 2
if: ${{ failure() && matrix.os == 'ubuntu-latest' && matrix.omniscidb-from == 'docker' }}
run: |
mamba run -n docker docker-compose logs --no-color --tail=10000 -f -t \> omniscidb-docker.log
cat omniscidb-docker.log
- name: Show Omniscidb server output [conda]
shell: bash -l {0}
if: matrix.os == 'ubuntu-latest' && matrix.omniscidb-from == 'conda'
run: |
cat omniscidb-output.txt
1 change: 1 addition & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ Externals
externals.cmath
externals.libdevice
externals.macros
externals.omniscidb
externals.stdio


Expand Down
11 changes: 4 additions & 7 deletions rbc/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,26 @@
rbc-specific errors and warnings.
"""

import numba
numba_errors = numba.core.errors


class OmnisciServerError(Exception):
"""
Launch when OmnisciDB server raises a runtime error that RBC knows
Raised when OmnisciDB server raises a runtime error that RBC knows
how to interpret.
"""
pass


class UnsupportedError(numba_errors.UnsupportedError):
class UnsupportedError(Exception):
"""
Launch when an attempt is to use a feature that is not supported
Raised when an attempt to use a feature that is not supported
for a given target.
"""
pass


class ForbiddenNameError(Exception):
"""
Launch when the user defines a function with name
Raised when the user defines a function with name
in a blacklist. For more info, see:
https://github.com/xnd-project/rbc/issues/32
"""
Expand Down
3 changes: 2 additions & 1 deletion rbc/extension_functions.thrift
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ values. Same comments apply as for TExtArgumentType in above. */
enum TOutputBufferSizeType {
kConstant,
kUserSpecifiedConstantParameter,
kUserSpecifiedRowMultiplier
kUserSpecifiedRowMultiplier,
kTableFunctionSpecifiedParameter
}

struct TUserDefinedFunction {
Expand Down
41 changes: 41 additions & 0 deletions rbc/externals/omniscidb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""External functions defined by the OmniSciDB server
"""


import functools
from rbc import irutils
from rbc.errors import UnsupportedError
from rbc.targetinfo import TargetInfo
from numba.core import extending, types as nb_types
from llvmlite import ir


@extending.intrinsic
def set_output_row_size(typingctx, set_output_row_size):
"""``set_output_row_size`` sets the row size of output Columns and
allocates the corresponding column buffers
.. note::
``set_output_row_size`` is available only for CPU target and OmniSciDB v5.7 or newer
"""
# void is declared as 'none' in Numba and 'none' is converted to a void* (int8*). See:
# https://github.com/numba/numba/blob/6881dfe3883d1344014ea16185ed87de4b75b9a1/numba/core/types/__init__.py#L95
sig = nb_types.void(nb_types.int64)

def codegen(context, builder, sig, args):
target_info = TargetInfo()
if target_info.software[1][:3] < (5, 7, 0):
msg = 'set_output_row_size is only available in OmniSciDB 5.7 or newer'
raise UnsupportedError(msg)

fnty = ir.FunctionType(ir.VoidType(), [ir.IntType(64)])
fn = irutils.get_or_insert_function(builder.module, fnty, name="set_output_row_size")
assert fn.is_declaration
builder.call(fn, args) # don't return anything

return sig, codegen


# fix docstring for intrinsics
for __func in (set_output_row_size,):
functools.update_wrapper(__func, __func._defn)
10 changes: 6 additions & 4 deletions rbc/externals/stdio.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,10 @@ def cg_fflush(builder):

@extending.intrinsic
def fflush(typingctx):
"""fflush that can be called from Numba jit-decorated functions.
"""``fflush`` that can be called from Numba jit-decorated functions.
Note: fflush is available only for CPU target.
.. note::
``fflush`` is available only for CPU target.
"""
sig = nb_types.void(nb_types.void)

Expand All @@ -37,9 +38,10 @@ def codegen(context, builder, signature, args):

@extending.intrinsic
def printf(typingctx, format_type, *args):
"""printf that can be called from Numba jit-decorated functions.
"""``printf`` that can be called from Numba jit-decorated functions.
Note: printf is available only for CPU target.
.. note::
``printf`` is available only for CPU target.
"""

if isinstance(format_type, nb_types.StringLiteral):
Expand Down
12 changes: 12 additions & 0 deletions rbc/irtools.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@


int32_t = ir.IntType(32)
int1_t = ir.IntType(1)


def get_called_functions(library, funcname=None):
Expand Down Expand Up @@ -345,6 +346,16 @@ def compile_instance(func, sig,
return fname


def add_byval_metadata(main_library):
module = ir.Module()
flag_name = "pass_column_arguments_by_value"
mflags = module.add_named_metadata('llvm.module.flags')
override_flag = int32_t(4)
flag = module.add_metadata([override_flag, flag_name, int1_t(0)])
mflags.add(flag)
main_library.add_ir_module(module)


def compile_to_LLVM(functions_and_signatures,
target_info: TargetInfo,
pipeline_class=compiler.Compiler,
Expand Down Expand Up @@ -400,6 +411,7 @@ def compile_to_LLVM(functions_and_signatures,
succesful_fids.append(fid)
function_names.append(fname)

add_byval_metadata(main_library)
main_library._optimize_final_module()

# Remove unused defined functions and declarations
Expand Down
2 changes: 1 addition & 1 deletion rbc/libfuncs.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ class OmnisciDB(Library):
name = 'omniscidb'

_function_names = list('''
allocate_varlen_buffer
allocate_varlen_buffer set_output_row_size
'''.strip().split())


Expand Down
36 changes: 20 additions & 16 deletions rbc/omnisci_backend/omnisci_buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,9 @@ class OmnisciBufferType(typesystem.Type):
"""
# When True, buffer type arguments are passed by value to
# functions [not recommended].
pass_by_value = False
@property
def pass_by_value(self):
return False

@classmethod
def preprocess_args(cls, args):
Expand Down Expand Up @@ -450,22 +452,25 @@ def codegen(context, builder, sig, args):


@extending.intrinsic
def omnisci_array_is_null_(typingctx, T, elem):
sig = types.boolean(T, elem)
def omnisci_buffer_idx_is_null_(typingctx, col_var, row_idx):
T = col_var.eltype
sig = types.boolean(col_var, row_idx)

target_info = TargetInfo()
null_value = target_info.null_values[f'{T.dtype}']
null_value = target_info.null_values[str(T)]
# The server sends numbers as unsigned values rather than signed ones.
# Thus, 129 should be read as -127 (overflow). See rbc issue #254
bitwidth = T.dtype.bitwidth
null_value = np.dtype(f'uint{bitwidth}').type(null_value).view(f'int{bitwidth}')
nv = ir.Constant(ir.IntType(bitwidth), null_value)
nv = ir.Constant(ir.IntType(T.bitwidth), null_value)

def codegen(context, builder, signature, args):
_, elem = args
if isinstance(T.dtype, types.Float):
elem = builder.bitcast(elem, nv.type)
return builder.icmp_signed('==', elem, nv)
ptr, index = args
data = builder.extract_value(builder.load(ptr), [0])
res = builder.load(builder.gep(data, [index]))

if isinstance(T, types.Float):
res = builder.bitcast(res, nv.type)

return builder.icmp_signed('==', res, nv)

return sig, codegen

Expand All @@ -475,19 +480,18 @@ def codegen(context, builder, signature, args):
# column is null
@extending.overload_method(BufferPointer, 'is_null')
def omnisci_buffer_is_null(x, row_idx=None):
T = x.eltype
if isinstance(x, BufferPointer):
if cgutils.is_nonelike(row_idx):
def impl(x, row_idx=None):
return omnisci_buffer_is_null_(x)
else:
def impl(x, row_idx=None):
return omnisci_array_is_null_(T, x[row_idx])
return omnisci_buffer_idx_is_null_(x, row_idx)
return impl


@extending.intrinsic
def omnisci_array_set_null_(typingctx, arr, row_idx):
def omnisci_buffer_idx_set_null(typingctx, arr, row_idx):
T = arr.eltype
sig = types.none(arr, row_idx)

Expand All @@ -504,7 +508,7 @@ def codegen(context, builder, signature, args):
fnop = context.typing_context.resolve_value_type(omnisci_buffer_ptr_setitem_)
setitem_sig = types.none(arr, row_idx, T)
# register the intrinsic in the typing ctx
fnop.get_call_type(context, setitem_sig.args, {})
fnop.get_call_type(context.typing_context, setitem_sig.args, {})
intrinsic = context.get_function(fnop, setitem_sig)

data, index = args
Expand All @@ -526,6 +530,6 @@ def impl(x, row_idx=None):
return omnisci_buffer_set_null_(x)
else:
def impl(x, row_idx=None):
return omnisci_array_set_null_(x, row_idx)
return omnisci_buffer_idx_set_null(x, row_idx)
return impl
return impl
5 changes: 4 additions & 1 deletion rbc/omnisci_backend/omnisci_column.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@
class OmnisciColumnType(OmnisciBufferType):
"""Omnisci Column type for RBC typesystem.
"""
pass_by_value = True
@property
def pass_by_value(self):
omnisci_version = TargetInfo().software[1][:3]
return omnisci_version < (5, 7, 0)


class OmnisciOutputColumnType(OmnisciColumnType):
Expand Down
25 changes: 11 additions & 14 deletions rbc/omniscidb.py
Original file line number Diff line number Diff line change
Expand Up @@ -839,7 +839,8 @@ def _make_udtf(self, caller, orig_sig, sig):
sizer_map = dict(
ConstantParameter='kUserSpecifiedConstantParameter',
RowMultiplier='kUserSpecifiedRowMultiplier',
Constant='kConstant')
Constant='kConstant',
SpecifiedParameter='kTableFunctionSpecifiedParameter')

unspecified = object()
inputArgTypes = []
Expand Down Expand Up @@ -889,20 +890,16 @@ def _make_udtf(self, caller, orig_sig, sig):
inputArgTypes.append(atype)
consumed_index += 1
if sizer is None:
sizer = 'kConstant'
if sizer == 'kConstant':
sizer_index = get_literal_return(
caller.func, verbose=self.debug)
sizer_index = get_literal_return(caller.func, verbose=self.debug)
if sizer_index is None:
raise TypeError(
f'Table function `{caller.func.__name__}`'
' has no sizing parameter nor has'
' it `return <literal value>` statement')
if sizer_index < 0:
raise ValueError(
f'Table function `{caller.func.__name__}`'
' sizing parameter must be non-negative'
f' integer (got {sizer_index})')
sizer = 'kTableFunctionSpecifiedParameter'
else:
sizer = 'kConstant'
if sizer_index < 0:
raise ValueError(
f'Table function `{caller.func.__name__}`'
' sizing parameter must be non-negative'
f' integer (got {sizer_index})')
sizer_type = (thrift.TOutputBufferSizeType
._NAMES_TO_VALUES[sizer])
return thrift.TUserDefinedTableFunction(
Expand Down
4 changes: 2 additions & 2 deletions rbc/tests/test_omnisci_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -544,11 +544,11 @@ def fn_issue197_bool(x):
def test_issue109(omnisci):

@omnisci('double[](int32)')
def foo(size):
def issue109(size):
a = Array(5, 'double')
for i in range(5):
a[i] = nb_types.double(i)
return a

_, result = omnisci.sql_execute('select foo(3);')
_, result = omnisci.sql_execute('select issue109(3);')
assert list(result) == [([0.0, 1.0, 2.0, 3.0, 4.0],)]
2 changes: 1 addition & 1 deletion rbc/tests/test_omnisci_array_null.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def test_array_null(omnisci, col):

expected = {
'b': [(ARRAY_NULL,), (ARRAY_NOT_NULL,), (ARRAY_NULL,),
(ARRAY_NOT_NULL,), (ARRAY_NULL,)],
(ARRAY_IDX_IS_NULL,), (ARRAY_NULL,)],
'i1': [(ARRAY_NULL,), (ARRAY_IDX_IS_NULL,), (ARRAY_NULL,),
(ARRAY_NOT_NULL,), (ARRAY_NULL,)],
'i2': [(ARRAY_NOT_NULL,), (ARRAY_NULL,), (ARRAY_NOT_NULL,),
Expand Down
Loading

0 comments on commit 3c9d3fa

Please sign in to comment.