Skip to content

Commit

Permalink
Merge pull request #2359 from opensafely-core/cast-bool-to-int
Browse files Browse the repository at this point in the history
Support as_int on bool series
  • Loading branch information
rebkwok authored Jan 17, 2025
2 parents 32e1603 + e4e4ce0 commit 6673e53
Show file tree
Hide file tree
Showing 9 changed files with 96 additions and 18 deletions.
16 changes: 16 additions & 0 deletions docs/includes/generated_docs/language__series.md
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,14 @@ status = status_code.map_values(
```
</div>

<div class="attr-heading" id="BoolPatientSeries.as_int">
<tt><strong>as_int</strong>()</tt>
<a class="headerlink" href="#BoolPatientSeries.as_int" title="Permanent link">🔗</a>
</div>
<div markdown="block" class="indent">
Return each value in this Boolean series as 1 (True) or 0 (False).
</div>

</div>


Expand Down Expand Up @@ -243,6 +251,14 @@ status = status_code.map_values(
```
</div>

<div class="attr-heading" id="BoolEventSeries.as_int">
<tt><strong>as_int</strong>()</tt>
<a class="headerlink" href="#BoolEventSeries.as_int" title="Permanent link">🔗</a>
</div>
<div markdown="block" class="indent">
Return each value in this Boolean series as 1 (True) or 0 (False).
</div>

<div class="attr-heading" id="BoolEventSeries.count_distinct_for_patient">
<tt><strong>count_distinct_for_patient</strong>()</tt>
<a class="headerlink" href="#BoolEventSeries.count_distinct_for_patient" title="Permanent link">🔗</a>
Expand Down
29 changes: 29 additions & 0 deletions docs/includes/generated_docs/specs.md
Original file line number Diff line number Diff line change
Expand Up @@ -2558,6 +2558,33 @@ returns the following patient series:



### 7.2 Convert a boolean value to an integer


#### 7.2.1 Bool as int
Booleans are converted to 0 (False) or 1 (True).

This example makes use of a patient-level table named `p` containing the following data:

| patient|b1 |
| - | - |
| 1|T |
| 2| |
| 3|F |

```python
p.b1.as_int()
```
returns the following patient series:

| patient | value |
| - | - |
| 1|1 |
| 2| |
| 3|0 |



## 8 Operations on integer series


Expand Down Expand Up @@ -3030,6 +3057,8 @@ returns the following patient series:


#### 11.1.3 Case with boolean column
Note that individual boolean columns can be converted to the integers 0 and 1 using
the `as_int()` method.

This example makes use of a patient-level table named `p` containing the following data:

Expand Down
12 changes: 2 additions & 10 deletions ehrql/measures/calculate.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from ehrql.measures.measures import get_all_group_by_columns
from ehrql.query_model.column_specs import ColumnSpec, get_column_spec_from_series
from ehrql.query_model.nodes import Case, Dataset, Function, Value, get_series_type
from ehrql.query_model.nodes import Dataset, Function, Value, get_series_type
from ehrql.query_model.transforms import substitute_parameters


Expand Down Expand Up @@ -207,15 +207,7 @@ def series_as_int(series):
if series_type is int:
return series
elif series_type is bool:
# TODO: This is definitely not the most efficient way to do this. We should
# extend the `CastToInt` operation to apply to boolean as well.
return Case(
{
Function.EQ(series, Value(True)): Value(1),
Function.EQ(series, Value(False)): Value(0),
},
default=None,
)
return Function.CastToInt(series)
else:
assert False

Expand Down
15 changes: 8 additions & 7 deletions ehrql/query_engines/trino.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,13 +93,14 @@ def date_difference_in_days(self, end, start):
)

def cast_to_int(self, value):
# Trino's casting to int rounds away from zero. We need to round towards zero for
# consistency with other query engines.
rounded_towards_zero = sqlalchemy.case(
(value > 0, SQLFunction("FLOOR", value)),
else_=SQLFunction("CEILING", value),
)
return sqlalchemy.cast(rounded_towards_zero, sqlalchemy.Integer)
if isinstance(value.type, sqlalchemy.Numeric):
# Trino's casting to int rounds away from zero. We need to round towards zero for
# consistency with other query engines.
value = sqlalchemy.case(
(value > 0, SQLFunction("FLOOR", value)),
else_=SQLFunction("CEILING", value),
)
return sqlalchemy.cast(value, sqlalchemy.Integer)

def truedivide(self, lhs, rhs):
rhs_null_if_zero = SQLFunction("NULLIF", rhs, 0.0, type_=sqlalchemy.Float)
Expand Down
10 changes: 10 additions & 0 deletions ehrql/query_language.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,16 @@ def __invert__(self: T) -> T:
"""
return _apply(qm.Function.Not, self)

@overload
def as_int(self: "PatientSeries") -> "IntPatientSeries": ...
@overload
def as_int(self: "EventSeries") -> "IntEventSeries": ...
def as_int(self):
"""
Return each value in this Boolean series as 1 (True) or 0 (False).
"""
return _apply(qm.Function.CastToInt, self)


class BoolPatientSeries(BoolFunctions, PatientSeries):
_type = bool
Expand Down
2 changes: 1 addition & 1 deletion ehrql/query_model/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ class FloorDivide(Series[int]):

# Casting numeric types
class CastToInt(Series[int]):
source: Series[Numeric]
source: Series[Numeric] | Series[bool]

class CastToFloat(Series[float]):
source: Series[Numeric]
Expand Down
25 changes: 25 additions & 0 deletions tests/spec/bool_series_ops/test_conversion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from ..tables import p


title = "Convert a boolean value to an integer"

table_data = {
p: """
| b1
--+----
1 | T
2 |
3 | F
""",
}


def test_bool_as_int(spec_test):
"""
Booleans are converted to 0 (False) or 1 (True).
"""
spec_test(
table_data,
p.b1.as_int(),
{1: 1, 2: None, 3: 0},
)
4 changes: 4 additions & 0 deletions tests/spec/case_expressions/test_case.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ def test_case_with_default(spec_test):


def test_case_with_boolean_column(spec_test):
"""
Note that individual boolean columns can be converted to the integers 0 and 1 using
the `as_int()` method.
"""
table_data = {
p: """
| i1 | b1
Expand Down
1 change: 1 addition & 0 deletions tests/spec/toc.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
],
"bool_series_ops": [
"test_logical_ops",
"test_conversion",
],
"int_series_ops": [
"test_arithmetic_ops",
Expand Down

0 comments on commit 6673e53

Please sign in to comment.