Skip to content

Commit

Permalink
WIP for some real EBCDIC tests
Browse files Browse the repository at this point in the history
  • Loading branch information
NWilson committed Jan 7, 2025
1 parent 35eafc9 commit 4b9f908
Show file tree
Hide file tree
Showing 29 changed files with 2,108 additions and 1,131 deletions.
21 changes: 21 additions & 0 deletions .github/workflows/dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,27 @@ jobs:
- name: Test
run: bazelisk test //... --enable_runfiles --incompatible_strict_action_env --test_output=all

ebcdic:
# Tests the full support for EBCDIC on a non-EBCDIC platform, using a
# hardcoded EBCDIC-1047 codepage.
name: EBCDIC
runs-on: ubuntu-24.04
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true

- name: Configure
# TODO: Add the new CFLAGS_GCC when merging with the other PR
run: cmake -DPCRE2_SUPPORT_JIT=OFF -DPCRE2_SUPPORT_UNICODE=OFF -DPCRE2_EBCDIC=ON -DPCRE2_EBCDIC_IGNORING_COMPILER=ON -DPCRE2_DEBUG=ON -DCMAKE_COMPILE_WARNING_AS_ERROR=ON -DCMAKE_BUILD_TYPE=Release -B build

- name: Build
run: cd build && make -j3

- name: Test
run: cd build && ../RunTest

heron:
# Job to verify that the tasks performed by PrepareRelease have been done. It is
# the committer's responsibility (currently) to run PrepareRelease themselves when
Expand Down
64 changes: 55 additions & 9 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,10 @@ set(

set(PCRE2_EBCDIC_NL25 OFF CACHE BOOL "Use 0x25 as EBCDIC NL character instead of 0x15; implies EBCDIC.")

set(PCRE2_EBCDIC_IGNORING_COMPILER OFF CACHE BOOL "Force EBCDIC 1047 using numeric literals rather than C character literals; implies EBCDIC.")

option(PCRE2_REBUILD_CHARTABLES "Rebuild char tables" OFF)

set(
PCRE2_LINK_SIZE
"2"
Expand Down Expand Up @@ -572,13 +576,42 @@ if(NEWLINE_DEFAULT STREQUAL "")
)
endif()

set(REBUILD_CHARTABLES OFF)
if(PCRE2_REBUILD_CHARTABLES)
set(REBUILD_CHARTABLES ON)
endif()

set(EBCDIC OFF)
if(PCRE2_EBCDIC)
set(EBCDIC 1)
set(EBCDIC ON)
endif()

if(PCRE2_EBCDIC_NL25)
set(EBCDIC 1)
set(EBCDIC_NL25 1)
set(EBCDIC ON)
set(EBCDIC_NL25 ON)
endif()

if(PCRE2_EBCDIC_IGNORING_COMPILER)
set(EBCDIC ON)
set(EBCDIC_IGNORING_COMPILER ON)
endif()

# Make sure that if EBCDIC is set (without EBCDIC_IGNORING_COMPILER), then
# REBUILD_CHARTABLES is also enabled.
# Also check that UTF support is not requested, because PCRE2 cannot handle
# EBCDIC and UTF in the same build. To do so it would need to use different
# character constants depending on the mode.
# Also, EBCDIC cannot be used with 16-bit and 32-bit libraries.
if(EBCDIC)
if(NOT EBCDIC_IGNORING_COMPILER)
set(REBUILD_CHARTABLES ON)
endif()
if(PCRE2_SUPPORT_UNICODE)
message(FATAL_ERROR "Support for EBCDIC and Unicode cannot be enabled at the same time")
endif()
if(PCRE2_BUILD_PCRE2_16 OR PCRE2_BUILD_PCRE2_32)
message(FATAL_ERROR "EBCDIC support is available only for the 8-bit library")
endif()
endif()

# Output files
Expand Down Expand Up @@ -652,8 +685,7 @@ endif()

# Character table generation

option(PCRE2_REBUILD_CHARTABLES "Rebuild char tables" OFF)
if(PCRE2_REBUILD_CHARTABLES)
if(REBUILD_CHARTABLES)
add_executable(pcre2_dftables src/pcre2_dftables.c)
add_custom_command(
OUTPUT ${PROJECT_BINARY_DIR}/pcre2_chartables.c
Expand All @@ -663,8 +695,12 @@ if(PCRE2_REBUILD_CHARTABLES)
COMMENT "Generating character tables (pcre2_chartables.c) for current locale"
VERBATIM
)
else()
elseif(NOT PCRE2_EBCDIC)
configure_file(${PROJECT_SOURCE_DIR}/src/pcre2_chartables.c.dist ${PROJECT_BINARY_DIR}/pcre2_chartables.c COPYONLY)
elseif(PCRE2_EBCDIC_NL25)
configure_file(${PROJECT_SOURCE_DIR}/src/pcre2_chartables.c.ebcdic-1047-nl25 ${PROJECT_BINARY_DIR}/pcre2_chartables.c COPYONLY)
else()
configure_file(${PROJECT_SOURCE_DIR}/src/pcre2_chartables.c.ebcdic-1047-nl15 ${PROJECT_BINARY_DIR}/pcre2_chartables.c COPYONLY)
endif()

# Source code
Expand Down Expand Up @@ -1342,9 +1378,19 @@ if(PCRE2_SHOW_REPORT)
message(STATUS " Newline char/sequence ............. : ${PCRE2_NEWLINE}")
message(STATUS " \\R matches only ANYCRLF ........... : ${PCRE2_SUPPORT_BSR_ANYCRLF}")
message(STATUS " \\C is disabled .................... : ${PCRE2_NEVER_BACKSLASH_C}")
message(STATUS " EBCDIC coding ..................... : ${PCRE2_EBCDIC}")
message(STATUS " EBCDIC coding with NL=0x25 ........ : ${PCRE2_EBCDIC_NL25}")
message(STATUS " Rebuild char tables ............... : ${PCRE2_REBUILD_CHARTABLES}")

if(NOT EBCDIC)
set(EBCDIC_NL_CODE "n/a")
elseif(EBCDIC_NL25)
set(EBCDIC_NL_CODE "0x25")
else()
set(EBCDIC_NL_CODE "0x15")
endif()
message(STATUS " EBCDIC coding ..................... : ${EBCDIC}")
message(STATUS " EBCDIC code for NL ................ : ${EBCDIC_NL_CODE}")
message(STATUS " EBCDIC coding ignoring compiler ... : ${PCRE2_EBCDIC_IGNORING_COMPILER}")
message(STATUS " Rebuild char tables ............... : ${REBUILD_CHARTABLES}")

message(STATUS " Internal link size ................ : ${PCRE2_LINK_SIZE}")
message(STATUS " Maximum variable lookbehind ....... : ${PCRE2_MAX_VARLOOKBEHIND}")
message(STATUS " Parentheses nest limit ............ : ${PCRE2_PARENS_NEST_LIMIT}")
Expand Down
17 changes: 16 additions & 1 deletion Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -362,9 +362,21 @@ src/pcre2_chartables.c: pcre2_dftables$(EXEEXT)
rm -f $@
./pcre2_dftables$(EXEEXT) $@
else
if WITH_EBCDIC
if WITH_EBCDIC_NL25
src/pcre2_chartables.c: $(srcdir)/src/pcre2_chartables.c.ebcdic-1047-nl25
rm -f $@
$(LN_S) $(abs_srcdir)/src/pcre2_chartables.c.ebcdic-1047-nl25 $(abs_builddir)/src/pcre2_chartables.c
else # WITH_EBCDIC_NL25
src/pcre2_chartables.c: $(srcdir)/src/pcre2_chartables.c.ebcdic-1047-nl15
rm -f $@
$(LN_S) $(abs_srcdir)/src/pcre2_chartables.c.ebcdic-1047-nl15 $(abs_builddir)/src/pcre2_chartables.c
endif # WITH_EBCDIC_NL25
else # WITH_EBCDIC
src/pcre2_chartables.c: $(srcdir)/src/pcre2_chartables.c.dist
rm -f $@
$(LN_S) $(abs_srcdir)/src/pcre2_chartables.c.dist $(abs_builddir)/src/pcre2_chartables.c
endif # WITH_EBCDIC
endif # WITH_REBUILD_CHARTABLES

BUILT_SOURCES = src/pcre2_chartables.c
Expand Down Expand Up @@ -460,7 +472,10 @@ endif # WITH_PCRE2_32
# The pcre2_chartables.c.dist file is the default version of
# pcre2_chartables.c, used unless --enable-rebuild-chartables is specified.

EXTRA_DIST += src/pcre2_chartables.c.dist
EXTRA_DIST += \
src/pcre2_chartables.c.dist \
src/pcre2_chartables.c.ebcdic-1047-nl15 \
src/pcre2_chartables.c.ebcdic-1047-nl25
CLEANFILES += src/pcre2_chartables.c

# The JIT compiler lives in a separate directory, but its files are #included
Expand Down
25 changes: 20 additions & 5 deletions README
Original file line number Diff line number Diff line change
Expand Up @@ -309,11 +309,22 @@ library. They are also documented in the pcre2build man page.

--enable-ebcdic --disable-unicode

This automatically implies --enable-rebuild-chartables (see above). However,
when PCRE2 is built this way, it always operates in EBCDIC. It cannot support
both EBCDIC and UTF-8/16/32. There is a second option, --enable-ebcdic-nl25,
which specifies that the code value for the EBCDIC NL character is 0x25
instead of the default 0x15.
This automatically implies --enable-rebuild-chartables (see above), in order
to ensure that you have the correct default character tables for your system's
codepage. There is an exception when you set --enable-ebcdic-ignoring-compiler
(see below), which allows using a default set of EBCDIC 1047 character tables
rather than forcing use of --enable-rebuild-chartables.

When PCRE2 is built with EBCDIC support, it always operates in EBCDIC. It
cannot support both EBCDIC and ASCII or UTF-8/16/32.

There is a second option, --enable-ebcdic-nl25, which specifies that the code
value for the EBCDIC NL character is 0x25 instead of the default 0x15.

There is a third option, --enable-ebcdic-ignoring-compiler, which disregards
the compiler's codepage for determining the numeric value of C character
constants such as 'z', and instead forces PCRE2 to use numeric constants for
the EBCDIC 1047 codepage instead.

. If you specify --enable-debug, additional debugging code is included in the
build. This option is intended for use by the PCRE2 maintainers.
Expand Down Expand Up @@ -822,6 +833,10 @@ The distribution should contain the files listed below.
src/pcre2_chartables.c.dist a default set of character tables that assume
ASCII coding; unless --enable-rebuild-chartables is
specified, used by copying to pcre2_chartables.c
src/pcre2_chartables.c.ebcdic-1047-{nl15,nl25} a default set of character
tables for EBCDIC 1047; used if
--enable-ebcdic-ignoring-compiler is specified
without --enable-rebuild-chartables

src/pcre2posix.c )
src/pcre2_auto_possess.c )
Expand Down
132 changes: 79 additions & 53 deletions RunTest
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,9 @@
# very much more stack than normal. In environments where the stack can be
# set at runtime, -bigstack sets a gigantic stack.
#
# There are two special cases where only one argument is allowed:
#
# If the first and only argument is "ebcdic", the script runs the special
# EBCDIC test that can be useful for checking certain EBCDIC features, even
# when run in an ASCII environment. PCRE2 must be built with EBCDIC support for
# this test to be run.
#
# If the script is obeyed as "RunTest list", a list of available tests is
# output, but none of them are run.
# Special cases where only one argument is allowed:
# - If the script is invoked as "RunTest list", a list of available tests is
# output, but none of them are run.
###############################################################################

# Define test titles in variables so that they can be output as a list. Some
Expand Down Expand Up @@ -92,6 +86,7 @@ title26="Test 26: Unicode property tests (compatible with Perl >= 5.38)"
title27="Test 27: Auto-generated unicode property tests"
maxtest=27
titleheap="Test 'heap': Environment-specific heap tests"
titleEBC="Test 'ebcdic': EBCDIC-specific tests"

if [ $# -eq 1 -a "$1" = "list" ]; then
echo $title0
Expand Down Expand Up @@ -124,6 +119,7 @@ if [ $# -eq 1 -a "$1" = "list" ]; then
echo $title27
echo ""
echo $titleheap
echo $titleEBC
echo ""
echo "Numbered tests are automatically run if nothing selected."
echo "Named tests must be explicitly selected."
Expand Down Expand Up @@ -357,6 +353,12 @@ support32=$?
$sim $pcre2test -C backslash-C >/dev/null
supportBSC=$?

# Check if compiled in EBCDIC mode, and whether we have EBCDIC I/O
$sim $pcre2test -C ebcdic >/dev/null
ebcdic=$?
$sim $pcre2test -C ebcdic-io >/dev/null
ebcdic_io=$?

# Initialize all bitsizes skipped

test8=skip
Expand Down Expand Up @@ -435,34 +437,38 @@ if [ $do0 = no -a $do1 = no -a $do2 = no -a $do3 = no -a \
$do24 = no -a $do25 = no -a $do26 = no -a $do27 = no -a \
$doheap = no -a $doebcdic = no \
]; then
do0=yes
do1=yes
do2=yes
do3=yes
do4=yes
do5=yes
do6=yes
do7=yes
do8=yes
do9=yes
do10=yes
do11=yes
do12=yes
do13=yes
do14=yes
do15=yes
do16=yes
do17=yes
do18=yes
do19=yes
do20=yes
do21=yes
do22=yes
do23=yes
do24=yes
do25=yes
do26=yes
do27=yes
if [ $ebcdic -eq 0 ] ; then
do0=yes
do1=yes
do2=yes
do3=yes
do4=yes
do5=yes
do6=yes
do7=yes
do8=yes
do9=yes
do10=yes
do11=yes
do12=yes
do13=yes
do14=yes
do15=yes
do16=yes
do17=yes
do18=yes
do19=yes
do20=yes
do21=yes
do22=yes
do23=yes
do24=yes
do25=yes
do26=yes
do27=yes
else
doebcdic=yes
fi
fi

# Handle any explicit skips at this stage, so that an argument list may consist
Expand Down Expand Up @@ -921,24 +927,44 @@ for bmode in "$test8" "$test16" "$test32"; do
checkresult $? heap-$bits ""
fi

# End of loop for 8/16/32-bit tests
done


# ------ Special EBCDIC Test -------
# Special EBCDIC tests

if [ $doebcdic = yes ] ; then
$sim $valgrind $pcre2test -C ebcdic >/dev/null
ebcdic=$?
if [ $ebcdic -ne 1 ] ; then
echo "Cannot run EBCDIC tests: EBCDIC support not compiled"
exit 1
if [ $doebcdic = yes ] ; then
echo $titleEBC
if [ $ebcdic -ne 1 ] ; then
echo "Cannot run EBCDIC tests: EBCDIC support not compiled"
exit 1
fi
if [ $ebcdic_io -eq 0 ] ; then
# Our testdata files are in ASCII, and the pcre2test program is using
# ASCII input: all easy.
for opt in "" "-dfa"; do
$sim $valgrind $pcre2test -q $setstack $bmode $opt $testdata/testinputEBC >testtry
checkresult $? EBC "$opt"
done
else
echo "Cannot run EBCDIC tests:"
echo " Ironically we do not support running these tests on an actual"
echo " EBCDIC system. The testdata files shipped with PCRE2 are in ASCII."
echo " You may be able to run the tests manually if you know which"
echo " EBCDIC codepage you used when compiling PCRE2, and then convert"
echo " the testdata to match. For example, if the C compiler used to build"
echo " PCRE2 was using IBM-1047:"
echo ""
echo " iconv -f ISO8859-1 -t IBM-1047 <testdata/testinputEBC >testinputEBC-native"
echo " pcre2test -q -$bmode testinputEBC-native >testoutputEBC-native"
echo " [ $? -eq 0 ] || echo 'pcre2test failed'"
echo " iconv -f IBM-1047 -t ISO8859-1 <testoutputEBC-native >testoutputEBC-ascii"
echo " $cf testdata/testoutputEBC testoutputEBC-ascii"
echo ""
echo "This is speculative. The PCRE2 maintainers do not have access to an"
echo "EBCDIC system to test this. Please report back if you try it."
exit 1
fi
fi
for opt in "" "-dfa"; do
$sim $valgrind $pcre2test -q $opt $testdata/testinputEBC >testtry
checkresult $? EBC "$opt"
done
fi

# End of loop for 8/16/32-bit tests
done


# Clean up local working files
Expand Down
1 change: 1 addition & 0 deletions config-cmake.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#cmakedefine BSR_ANYCRLF 1
#cmakedefine EBCDIC 1
#cmakedefine EBCDIC_NL25 1
#cmakedefine EBCDIC_IGNORING_COMPILER 1
#cmakedefine HEAP_MATCH_RECURSE 1
#cmakedefine NEVER_BACKSLASH_C 1

Expand Down
Loading

0 comments on commit 4b9f908

Please sign in to comment.