Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

In standalone Python extension, actually use the CMake build #441

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
[build-system]
requires = ["setuptools", "wheel", "cmake"]
requires = ["setuptools", "wheel", "cmake>=3.17"]
14 changes: 9 additions & 5 deletions python/BuildStandalone.cmake
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
cmake_minimum_required(VERSION 3.1)
cmake_minimum_required(VERSION 3.17)
project(kenlm-standalone)
find_package(Python ${PYTHON_VERSION_STRING} REQUIRED COMPONENTS Development)

file(GLOB
KENLM_PYTHON_STANDALONE_SRCS
Expand All @@ -11,12 +13,14 @@ file(GLOB
list(FILTER KENLM_PYTHON_STANDALONE_SRCS EXCLUDE REGEX ".*main.cc")
list(FILTER KENLM_PYTHON_STANDALONE_SRCS EXCLUDE REGEX ".*test.cc")

add_library(

Python_add_library(
kenlm
SHARED
${KENLM_PYTHON_STANDALONE_SRCS}
)
MODULE WITH_SOABI
${KENLM_PYTHON_STANDALONE_SRCS} python/kenlm.cpp
)

set_property(TARGET kenlm PROPERTY CXX_STANDARD 11)
target_include_directories(kenlm PRIVATE ${PROJECT_SOURCE_DIR})
target_compile_definitions(kenlm PRIVATE KENLM_MAX_ORDER=${KENLM_MAX_ORDER})

Expand Down
78 changes: 19 additions & 59 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,62 +1,25 @@
from setuptools import setup, Extension
from setuptools.command.build_ext import build_ext as _build_ext
import glob
import platform
import subprocess
import os
import sys
import re
from pathlib import Path

#Does gcc compile with this header and library?
def compile_test(header, library):
dummy_path = os.path.join(os.path.dirname(__file__), "dummy")
command = "bash -c \"g++ -include " + header + " -l" + library + " -x c++ - <<<'int main() {}' -o " + dummy_path + " >/dev/null 2>/dev/null && rm " + dummy_path + " 2>/dev/null\""
return os.system(command) == 0
VERSION = "0.2.0"

# Use an environment variable
max_order = os.getenv("MAX_ORDER", "6")

# Try to get from --config-settings, if present
is_max_order = [s for s in sys.argv if "--max_order" in s]
for element in is_max_order:
max_order = re.split('[= ]',element)[1]
max_order = re.split("[= ]", element)[1]
sys.argv.remove(element)

print(f"Will build with KenLM max_order set to {max_order}")

FILES = glob.glob('util/*.cc') + glob.glob('lm/*.cc') + glob.glob('util/double-conversion/*.cc') + glob.glob('python/*.cc')
FILES = [fn for fn in FILES if not (fn.endswith('main.cc') or fn.endswith('test.cc'))]

#We don't need -std=c++11 but python seems to be compiled with it now. https://github.com/kpu/kenlm/issues/86
ARGS = ['-O3', '-DNDEBUG', '-DKENLM_MAX_ORDER='+max_order, '-std=c++11']
INCLUDE_PATHS = []

if platform.system() == 'Linux':
LIBS = ['stdc++', 'rt']
ARGS.append('-DHAVE_CLOCKGETTIME')
elif platform.system() == 'Darwin':
LIBS = ['c++']
else:
LIBS = []

#Attempted fix to https://github.com/kpu/kenlm/issues/186 and https://github.com/kpu/kenlm/issues/197
if platform.system() == 'Darwin':
ARGS += ["-stdlib=libc++", "-mmacosx-version-min=10.7"]
INCLUDE_PATHS.append("/usr/local/include")

if compile_test('zlib.h', 'z'):
ARGS.append('-DHAVE_ZLIB')
LIBS.append('z')

if compile_test('bzlib.h', 'bz2'):
ARGS.append('-DHAVE_BZLIB')
LIBS.append('bz2')

if compile_test('lzma.h', 'lzma'):
ARGS.append('-DHAVE_XZLIB')
LIBS.append('lzma')


class build_ext(_build_ext):
def run(self):
Expand All @@ -68,31 +31,31 @@ def run(self):
+ ", ".join(e.name for e in self.extensions)
)

ext_dir = str(Path(self.get_ext_fullpath('libkenlm')).absolute().parent)
ext_dir = str(Path(self.get_ext_fullpath("kenlm")).absolute().parent)
source_dir = str(Path(__file__).absolute().parent)

cmake_args = [
"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=" + ext_dir,
f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={ext_dir}",
"-DBUILD_SHARED_LIBS=ON",
"-DBUILD_PYTHON_STANDALONE=ON",
f"-DKENLM_MAX_ORDER={max_order}",
f"-DCMAKE_PROJECT_VERSION={VERSION}",
]
cfg = "Debug" if self.debug else "Release"
build_args = ["--config", cfg]

if platform.system() == "Windows":
cmake_args += [
"-DCMAKE_WINDOWS_EXPORT_ALL_SYMBOLS=ON",
"-DCMAKE_RUNTIME_OUTPUT_DIRECTORY_{}={}".format(cfg.upper(), ext_dir),
"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{}={}".format(cfg.upper(), ext_dir),
"-DCMAKE_ARCHIVE_OUTPUT_DIRECTORY_{}={}".format(cfg.upper(), ext_dir),
f"-DCMAKE_RUNTIME_OUTPUT_DIRECTORY_{cfg.upper()}={ext_dir}",
f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{cfg.upper()}={ext_dir}",
f"-DCMAKE_ARCHIVE_OUTPUT_DIRECTORY_{cfg.upper()}={ext_dir}",
]
if sys.maxsize > 2**32:
cmake_args += ["-A", "x64"]
build_args += ["--", "/m"]
# build_args += ["--", "/m"]
else:
cmake_args += ["-DCMAKE_BUILD_TYPE=" + cfg]
build_args += ["--", "-j4"]
cmake_args.append(f"-DCMAKE_BUILD_TYPE={cfg}")

env = os.environ.copy()
env["CXXFLAGS"] = '{} -fPIC -DVERSION_INFO=\\"{}\\"'.format(
Expand All @@ -105,25 +68,22 @@ def run(self):
["cmake", source_dir] + cmake_args, cwd=self.build_temp, env=env
)
subprocess.check_call(
["cmake", "--build", "."] + build_args, cwd=self.build_temp
["cmake", "--build", ".", "-j", "4"] + build_args, cwd=self.build_temp
)

return _build_ext.run(self)


ext_modules = [
Extension(name='kenlm',
sources=FILES + ['python/kenlm.cpp'],
language='C++',
include_dirs=['.'] + INCLUDE_PATHS,
depends = ['python/BuildStandalone.cmake'],
libraries=LIBS,
extra_compile_args=ARGS),
Extension(
name="kenlm",
language="C++",
sources=[],
depends=["python/BuildStandalone.cmake"],
),
]

setup(
name='kenlm',
version='0.2.0',
name="kenlm",
version=VERSION,
ext_modules=ext_modules,
cmdclass={"build_ext": build_ext},
include_package_data=True,
Expand Down
6 changes: 3 additions & 3 deletions util/read_compressed.cc
Original file line number Diff line number Diff line change
Expand Up @@ -168,11 +168,11 @@ class GZip {

void SetOutput(void *to, std::size_t amount) {
stream_.next_out = static_cast<Bytef*>(to);
stream_.avail_out = std::min<std::size_t>(std::numeric_limits<uInt>::max(), amount);
stream_.avail_out = std::min<std::size_t>((std::numeric_limits<uInt>::max)(), amount);
}

void SetInput(const void *base, std::size_t amount) {
assert(amount < static_cast<std::size_t>(std::numeric_limits<uInt>::max()));
assert(amount < static_cast<std::size_t>((std::numeric_limits<uInt>::max)()));
stream_.next_in = const_cast<Bytef*>(static_cast<const Bytef*>(base));
stream_.avail_in = amount;
}
Expand Down Expand Up @@ -225,7 +225,7 @@ class BZip {

void SetOutput(void *base, std::size_t amount) {
stream_.next_out = static_cast<char*>(base);
stream_.avail_out = std::min<std::size_t>(std::numeric_limits<unsigned int>::max(), amount);
stream_.avail_out = std::min<std::size_t>((std::numeric_limits<unsigned int>::max)(), amount);
}

void SetInput(const void *base, std::size_t amount) {
Expand Down