Skip to content

Commit

Permalink
[#670] Allow lengthof operator for strings and bytes
Browse files Browse the repository at this point in the history
  • Loading branch information
Mi-La committed Dec 2, 2024
1 parent 4f2375f commit 2cfab61
Show file tree
Hide file tree
Showing 12 changed files with 341 additions and 20 deletions.
43 changes: 36 additions & 7 deletions compiler/core/src/zserio/ast/Expression.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package zserio.ast;

import java.math.BigInteger;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
Expand Down Expand Up @@ -1156,17 +1157,45 @@ private void evaluateIsSetOperator()

private void evaluateLengthOfOperator()
{
if (!(operand1.symbolInstantiation instanceof ArrayInstantiation))
throw new ParserException(operand1, "'" + operand1.text + "' is not an array!");

expressionType = ExpressionType.INTEGER;

final Expression lengthExpr = ((ArrayInstantiation)operand1.symbolInstantiation).getLengthExpression();
if (lengthExpr != null && lengthExpr.getIntegerValue() != null)
expressionIntegerValue = new ExpressionIntegerValue(lengthExpr.getIntegerValue());
else
if (operand1.symbolInstantiation instanceof ArrayInstantiation)
{
final Expression lengthExpr =
((ArrayInstantiation)operand1.symbolInstantiation).getLengthExpression();
if (lengthExpr != null && lengthExpr.getIntegerValue() != null)
{
expressionIntegerValue = new ExpressionIntegerValue(lengthExpr.getIntegerValue());
}
else
{
expressionIntegerValue = new ExpressionIntegerValue(
VarIntegerType.VARSIZE_LOWER_BOUND, VarIntegerType.VARSIZE_UPPER_BOUND);
}
}
else if (operand1.expressionType == ExpressionType.STRING)
{
if (operand1.expressionStringValue != null)
{
expressionIntegerValue = new ExpressionIntegerValue(BigInteger.valueOf(
operand1.expressionStringValue.getBytes(StandardCharsets.UTF_8).length));
}
else
{
expressionIntegerValue = new ExpressionIntegerValue(
VarIntegerType.VARSIZE_LOWER_BOUND, VarIntegerType.VARSIZE_UPPER_BOUND);
}
}
else if (operand1.expressionType == ExpressionType.BYTES)
{
expressionIntegerValue = new ExpressionIntegerValue(
VarIntegerType.VARSIZE_LOWER_BOUND, VarIntegerType.VARSIZE_UPPER_BOUND);
}
else
{
throw new ParserException(
operand1, "'" + operand1.text + "' is not supported by lengthof operator!");
}
}

private void evaluateValueOfOperator()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package zserio.runtime;

import java.math.BigInteger;
import java.nio.charset.StandardCharsets;

/**
* The implementation of all Zserio built-in operators called from code generated by Zserio.
Expand Down Expand Up @@ -101,4 +102,15 @@ public static short numBits(BigInteger numValues)

return (short)calcValue.bitLength();
}

/**
* Gets length of the given string in UTF-8 bytes.
*
* @param stringValue String value.
* @return Number of bytes needed to encode given string in UTF-8.
*/
public static int lengthOf(String stringValue)
{
return stringValue.getBytes(StandardCharsets.UTF_8).length;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -80,4 +80,14 @@ public void numBits()
assertEquals(63, BuiltInOperators.numBits(BigInteger.ONE.shiftLeft(63)));
assertEquals(64, BuiltInOperators.numBits(BigInteger.ONE.shiftLeft(63).add(BigInteger.ONE)));
}

@Test
void lengthOf()
{
assertEquals(0, BuiltInOperators.lengthOf(""));
assertEquals(3, BuiltInOperators.lengthOf("abc"));
assertEquals(3, BuiltInOperators.lengthOf("€"));
assertEquals(1, BuiltInOperators.lengthOf("$"));
assertEquals(4, BuiltInOperators.lengthOf("€$"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import zserio.ast.EnumItem;
import zserio.ast.EnumType;
import zserio.ast.Expression;
import zserio.ast.Expression.ExpressionType;
import zserio.ast.Field;
import zserio.ast.Function;
import zserio.ast.Package;
Expand Down Expand Up @@ -229,7 +230,14 @@ public UnaryExpressionFormatting getFunctionCall(Expression expr)
@Override
public UnaryExpressionFormatting getLengthOf(Expression expr)
{
return new UnaryExpressionFormatting("", ".length");
if (expr.op1().getExprType() == ExpressionType.STRING)
{
return new UnaryExpressionFormatting("zserio.runtime.BuiltInOperators.lengthOf(", ")");
}
else
{
return new UnaryExpressionFormatting("", ".length");
}
}

@Override
Expand Down
11 changes: 11 additions & 0 deletions compiler/extensions/python/runtime/src/zserio/builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,14 @@ def numbits(num_values: int) -> int:
return 1

return (num_values - 1).bit_length()


def lengthof_string(string: str) -> int:
"""
Gets length of the given string in UTF-8 bytes.
:param string: String value.
:returns: Number of bytes needed to encode given string in UTF-8.
"""

return len(string.encode("utf-8"))
9 changes: 8 additions & 1 deletion compiler/extensions/python/runtime/tests/test_builtin.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import unittest

from zserio.builtin import isset, numbits
from zserio.builtin import isset, numbits, lengthof_string


class BuiltinOperatorsTest(unittest.TestCase):
Expand Down Expand Up @@ -76,3 +76,10 @@ def test_numbits(self):
self.assertEqual(33, numbits((1 << 32) + 1))
self.assertEqual(63, numbits(1 << 63))
self.assertEqual(64, numbits((1 << 63) + 1))

def test_lengthof_string(self):
self.assertEqual(0, lengthof_string(""))
self.assertEqual(3, lengthof_string("abc"))
self.assertEqual(3, lengthof_string("€"))
self.assertEqual(1, lengthof_string("$"))
self.assertEqual(4, lengthof_string("€$"))
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import zserio.ast.EnumItem;
import zserio.ast.EnumType;
import zserio.ast.Expression;
import zserio.ast.Expression.ExpressionType;
import zserio.ast.Field;
import zserio.ast.Function;
import zserio.ast.Package;
Expand Down Expand Up @@ -175,7 +176,15 @@ public UnaryExpressionFormatting getFunctionCall(Expression expr)
@Override
public UnaryExpressionFormatting getLengthOf(Expression expr)
{
return new UnaryExpressionFormatting("len(", ")");
if (expr.op1().getExprType() == ExpressionType.STRING)
{
importCollector.importPackage("zserio");
return new UnaryExpressionFormatting("zserio.builtin.lengthof_string(", ")");
}
else
{
return new UnaryExpressionFormatting("len(", ")");
}
}

@Override
Expand Down
17 changes: 10 additions & 7 deletions doc/ZserioLanguageOverview.md
Original file line number Diff line number Diff line change
Expand Up @@ -882,20 +882,23 @@ The bitwise complement `~` is defined for integer and bitmask expressions.

#### lengthof Operator

The `lengthof` operator may be applied to an array member and returns the actual length (i.e. number
of elements of an array. Thus, given `int32 a[5]`, the expression `lengthof` a evaluates to `5`. This is not
particularly useful for fixed or variable length arrays, but it is the only way to refer to the length of an
implicit length array.
The `lengthof` operator may be applied to array members, bytes types or strings and returns the actual length
(i.e. number of elements of an array). Thus, given `int32 a[5]`, the expression `lengthof` a evaluates to `5`.
This is not particularly useful for fixed or variable length arrays, but it is the only way to refer to
the length of an auto length array.

> The `lengthof` function returns the number of bytes in a string, encoded using UTF-8.
> For example, the length of the string "€" is `3`, because it is encoded as three bytes in UTF-8.
**Example**
```
struct LengthOfOperator
{
implicit uint8 implicitArray[];
uint8 autoArray[];
function uint32 getLengthOfImplicitArray()
function uint32 getLengthOfAutoArray()
{
return lengthof(implicitArray);
return lengthof(autoArray);
}
};
```
Expand Down
2 changes: 1 addition & 1 deletion test/data
79 changes: 77 additions & 2 deletions test/extensions/language/expressions/cpp/LengthOfOperatorTest.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#include "expressions/lengthof_operator/LengthOfFunctions.h"
#include "expressions/lengthof_operator/STR_CONSTANT.h"
#include "gtest/gtest.h"
#include "zserio/RebindAlloc.h"
#include "zserio/SerializeUtil.h"

namespace expressions
{
Expand All @@ -10,8 +12,9 @@ namespace lengthof_operator
using allocator_type = LengthOfFunctions::allocator_type;
template <typename T>
using vector_type = zserio::vector<T, allocator_type>;
using string_type = zserio::string<allocator_type>;

TEST(LengthOfOperatorTest, GetLengthOfFixedArray)
TEST(LengthOfOperatorTest, getLengthOfFixedArray)
{
LengthOfFunctions lengthOfFunctions;
const size_t fixedArrayLength = 10;
Expand All @@ -20,7 +23,7 @@ TEST(LengthOfOperatorTest, GetLengthOfFixedArray)
ASSERT_EQ(fixedArrayLength, lengthOfFunctions.funcGetLengthOfFixedArray());
}

TEST(LengthOfOperatorTest, GetLengthOfVariableArray)
TEST(LengthOfOperatorTest, getLengthOfVariableArray)
{
LengthOfFunctions lengthOfFunctions;
const size_t variableArrayLength = 11;
Expand All @@ -30,5 +33,77 @@ TEST(LengthOfOperatorTest, GetLengthOfVariableArray)
ASSERT_EQ(variableArrayLength, lengthOfFunctions.funcGetLengthOfVariableArray());
}

TEST(LengthOfOperatorTest, getLengthOfStrConstant)
{
LengthOfFunctions lengthOfFunctions;
ASSERT_EQ(11, STR_CONSTANT.size()); // check that it's length in bytes (UTF-8)
ASSERT_EQ(STR_CONSTANT.size(), lengthOfFunctions.funcGetLengthOfStrConstant());
}

TEST(LengthOfOperatorTest, getLengthOfLiteral)
{
LengthOfFunctions lengthOfFunctions;
ASSERT_EQ(10, zserio::makeStringView("€literal").size()); // check that it's length in bytes (UTF-8)
ASSERT_EQ(zserio::makeStringView("€literal").size(), lengthOfFunctions.funcGetLengthOfLiteral());
}

TEST(LengthOfOperatorTest, literalLengthFieldDefault)
{
LengthOfFunctions lengthOfFunctions;
ASSERT_EQ(10, zserio::makeStringView("€literal").size()); // check that it's length in bytes (UTF-8)
ASSERT_EQ(zserio::makeStringView("€literal").size(), lengthOfFunctions.getLiteralLengthField());
}

TEST(LengthOfOperatorTest, getLengthOfString)
{
auto strField = string_type("€test");
LengthOfFunctions lengthOfFunctions;
lengthOfFunctions.setStrField(strField);
ASSERT_EQ(7, strField.size()); // check that it's length in bytes (UTF-8)
ASSERT_EQ(strField.size(), lengthOfFunctions.funcGetLengthOfString());
}

TEST(LengthOfOperatorTest, getLengthOfBytes)
{
auto bytesField = vector_type<uint8_t>{{0x00, 0x01, 0x02}};
LengthOfFunctions lengthOfFunctions;
lengthOfFunctions.setBytesField(bytesField);
ASSERT_EQ(bytesField.size(), lengthOfFunctions.funcGetLengthOfBytes());
}

TEST(LengthOfOperatorTest, getLengthOfFirstStrInArray)
{
auto strArray = vector_type<string_type>{{string_type(""), string_type("$")}};
LengthOfFunctions lengthOfFunctions;
lengthOfFunctions.setStrArray(strArray);
ASSERT_EQ(3, strArray.at(0).size()); // check that it's length in bytes (UTF-8)
ASSERT_EQ(strArray.at(0).size(), lengthOfFunctions.funcGetLengthOfFirstStrInArray());
}

TEST(LengthOfOperatorTest, getLengthOfFirstBytesInArray)
{
auto bytesArray = vector_type<vector_type<uint8_t>>{{{{0x00, 0x01}}, {{}}}};
LengthOfFunctions lengthOfFunctions;
lengthOfFunctions.setBytesArray(bytesArray);
ASSERT_EQ(bytesArray.at(0).size(), lengthOfFunctions.funcGetLengthOfFirstBytesInArray());
}

TEST(LengthOfOperatorTest, writeRead)
{
LengthOfFunctions lengthOfFunctions;
lengthOfFunctions.setFixedArray(
vector_type<uint8_t>{{0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09}});
lengthOfFunctions.setNumElements(3);
lengthOfFunctions.setVariableArray(vector_type<uint8_t>{{0x03, 0x02, 0x01}});
lengthOfFunctions.setStrField("longer than constant");
lengthOfFunctions.setBytesField(vector_type<uint8_t>{{0x00, 0x01, 0x02}});
lengthOfFunctions.setStrArray(vector_type<string_type>());
lengthOfFunctions.setBytesArray(vector_type<vector_type<uint8_t>>());

auto bitBuffer = zserio::serialize(lengthOfFunctions);
auto readLengthOfFunctions = zserio::deserialize<LengthOfFunctions>(bitBuffer);
ASSERT_EQ(lengthOfFunctions, readLengthOfFunctions);
}

} // namespace lengthof_operator
} // namespace expressions
Loading

0 comments on commit 2cfab61

Please sign in to comment.